#######################################################################################################
# PROJECT LECTURER: PROF. ADAM ZAGDANSKI                                                              #
# COURSE TITLE: DATA MINING                                                                           #
# PROJECT TOPIC: CLUSTER ANALYSIS OF BREAST CANCER CELL: AN APPLICATION OF PARTITION AND              #
#               HIERARCHICAL APPROACH                                                                 #
# STUDENTS AND ID: SEGUN LIGHT JEGEDE (257389) and ISAAC AKOJI PAUL (257388)                          #
#######################################################################################################
## LOADING THE DATA INTO R
#Reading the data into r and renaming the variable names in a readable manner.
col.names=c("id_number","clump_thickness","uniformity_cell_size","uniformity_cell_shape",
            "marginal_adhesion","single_epithelial_cell_size","bare_nuclei",
            "bland_chromatin","normal_nucleoli","mitoses","class")
bcw <- read.csv("C:/Users/jeged/Downloads/breast-cancer-wisconsin.data", header=FALSE, col.names=col.names)
#View(bcw)
attach(bcw)
## DATA PREPARATION AND CLEANING
library(DataExplorer)

#Checking the data type of each columns
str(bcw)
## 'data.frame':    699 obs. of  11 variables:
##  $ id_number                  : int  1000025 1002945 1015425 1016277 1017023 1017122 1018099 1018561 1033078 1033078 ...
##  $ clump_thickness            : int  5 5 3 6 4 8 1 2 2 4 ...
##  $ uniformity_cell_size       : int  1 4 1 8 1 10 1 1 1 2 ...
##  $ uniformity_cell_shape      : int  1 4 1 8 1 10 1 2 1 1 ...
##  $ marginal_adhesion          : int  1 5 1 1 3 8 1 1 1 1 ...
##  $ single_epithelial_cell_size: int  2 7 2 3 2 7 2 2 2 2 ...
##  $ bare_nuclei                : chr  "1" "10" "2" "4" ...
##  $ bland_chromatin            : int  3 3 3 3 3 9 3 3 1 2 ...
##  $ normal_nucleoli            : int  1 2 1 7 1 7 1 1 1 1 ...
##  $ mitoses                    : int  1 1 1 1 1 1 1 1 5 1 ...
##  $ class                      : int  2 2 2 2 2 4 2 2 2 2 ...
bcw$class = as.factor(bcw$class) #convert the class to factor with "2" as benign and "4" as malignant
levels(bcw$class)[levels(bcw$class)=="2"] <- "benign"
levels(bcw$class)[levels(bcw$class)=="4"] <- "malignant"
bcw[,2:10] <- suppressWarnings(apply(bcw[, 2:10], 2, function(x) as.numeric(as.character(x)))) #format all features as numeric
bcw$id_number = as.character(bcw$id_number) #id_number is nothing but a string of cells identification number
str(bcw) #Every attributes is now in their respective perfect form.
## 'data.frame':    699 obs. of  11 variables:
##  $ id_number                  : chr  "1000025" "1002945" "1015425" "1016277" ...
##  $ clump_thickness            : num  5 5 3 6 4 8 1 2 2 4 ...
##  $ uniformity_cell_size       : num  1 4 1 8 1 10 1 1 1 2 ...
##  $ uniformity_cell_shape      : num  1 4 1 8 1 10 1 2 1 1 ...
##  $ marginal_adhesion          : num  1 5 1 1 3 8 1 1 1 1 ...
##  $ single_epithelial_cell_size: num  2 7 2 3 2 7 2 2 2 2 ...
##  $ bare_nuclei                : num  1 10 2 4 1 10 10 1 1 1 ...
##  $ bland_chromatin            : num  3 3 3 3 3 9 3 3 1 2 ...
##  $ normal_nucleoli            : num  1 2 1 7 1 7 1 1 1 1 ...
##  $ mitoses                    : num  1 1 1 1 1 1 1 1 5 1 ...
##  $ class                      : Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...
#Handling missing data by deleting the corresponding rows if the missing observations are not too much
t(introduce(bcw))
##                       [,1]
## rows                   699
## columns                 11
## discrete_columns         2
## continuous_columns       9
## all_missing_columns      0
## total_missing_values    16
## complete_rows          683
## total_observations    7689
## memory_usage         98752
sum(is.na(bcw)) #check for missing observations
## [1] 16
plot_intro(bcw)

plot_missing(bcw) 

bcw<-na.omit(bcw) #2.29% of the bare_nuclei measurement are missing variables, thus, we decided to remove any form of missing observation.
sum(is.na(bcw))
## [1] 0
plot_missing(bcw) 

nrow(bcw) #The data reduced from 699 to 683, we suppose we did not lose too much information, just about 3%
## [1] 683
#View(bcw)
##EXPLORATORY DATA ANALYSIS
#Describing the Grouping Variable
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
bcwnew <- bcw %>% group_by(class) %>% 
  summarize(count = n()) %>%  # count records by species
  mutate(percentage = count/sum(count))  # find percent of total
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(bcwnew, aes(class, percentage, fill = class)) + 
  geom_bar(stat='identity') + 
  geom_text(aes(label=scales::percent(percentage)), position = position_stack(vjust = .5))+
  scale_y_continuous(labels = scales::percent)

#Describing the features
my.summary <- function(df)
{
  results <- matrix(, nrow = 9, ncol = ncol(df))
  for (i in 1:ncol(df)){
    X=df[,i]
    results[,i] <- rbind(min(X),quantile(X,0.25), median(X), mean(X), quantile(X,0.75), max(X), var(X), sd(X), IQR(X))
  }
  rownames(results) <- c("min", "Q1", "median", "mean", "Q3", "max", "var", "sd", "IQR")
  colnames(results) <-names(df)
  return(results)
}
ms<-my.summary(bcw[,2:10]) 
ms
##        clump_thickness uniformity_cell_size uniformity_cell_shape
## min           1.000000             1.000000              1.000000
## Q1            2.000000             1.000000              1.000000
## median        4.000000             1.000000              1.000000
## mean          4.442167             3.150805              3.215227
## Q3            6.000000             5.000000              5.000000
## max          10.000000            10.000000             10.000000
## var           7.956694             9.395113              8.931615
## sd            2.820761             3.065145              2.988581
## IQR           4.000000             4.000000              4.000000
##        marginal_adhesion single_epithelial_cell_size bare_nuclei
## min             1.000000                    1.000000    1.000000
## Q1              1.000000                    2.000000    1.000000
## median          1.000000                    2.000000    1.000000
## mean            2.830161                    3.234261    3.544656
## Q3              4.000000                    4.000000    6.000000
## max            10.000000                   10.000000   10.000000
## var             8.205717                    4.942109   13.277695
## sd              2.864562                    2.223085    3.643857
## IQR             3.000000                    2.000000    5.000000
##        bland_chromatin normal_nucleoli   mitoses
## min           1.000000        1.000000  1.000000
## Q1            2.000000        1.000000  1.000000
## median        3.000000        1.000000  1.000000
## mean          3.445095        2.869693  1.603221
## Q3            5.000000        4.000000  1.000000
## max          10.000000       10.000000 10.000000
## var           6.001013        9.318772  3.002160
## sd            2.449697        3.052666  1.732674
## IQR           3.000000        3.000000  0.000000
write.table(ms, file = "summary statistics.txt", sep = ",", quote = FALSE, row.names = F)

#construct the plots three by three
#construct the histogram plots
library(ggpubr)
ha<-fg<-ggplot(bcw, aes(x = clump_thickness, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
hb<-ggplot(bcw, aes(x = uniformity_cell_size, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
hc<-ggplot(bcw, aes(x = uniformity_cell_shape, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
hd<-fg<-ggplot(bcw, aes(x = marginal_adhesion, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
ggarrange(ha,hb,hc,hd,labels = c("A", "B", "C","D"),ncol = 2, nrow = 2, common.legend = TRUE, legend="bottom")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

hd<-fg<-ggplot(bcw, aes(x = marginal_adhesion, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
he<-ggplot(bcw, aes(x = single_epithelial_cell_size, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
hf<-ggplot(bcw, aes(x = bare_nuclei, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
hg<-fg<-ggplot(bcw, aes(x = bland_chromatin, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
hh<-ggplot(bcw, aes(x = normal_nucleoli, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
ggarrange(he,hf,hg,hh,labels = c("E", "F","G", "H"),ncol = 2, nrow = 2, common.legend = TRUE, legend="bottom")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

hi<-ggplot(bcw, aes(x = mitoses, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
ggarrange(hi,labels = c("I"),ncol = 2, nrow = 2) #, common.legend = TRUE, legend="bottom")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

'plot_histogram(bcw)'
## [1] "plot_histogram(bcw)"
#construct the density plots
plot_density(bcw[2:4])

plot_density(bcw[5:7])

plot_density(bcw[8:10])

#construct the normal qq plot
plot_qq(bcw[2:4])

plot_qq(bcw[5:7])

plot_qq(bcw[8:10])

#construct the barplots
bpa <- ggplot(bcw, aes(x = class, y = clump_thickness))+geom_boxplot(aes(color = class))+
  scale_color_manual(values = c("#00AFBB", "#E7B800"))
bpb <- ggplot(bcw, aes(x = class, y = uniformity_cell_size))+geom_boxplot(aes(color = class))+
  scale_color_manual(values = c("#00AFBB", "#E7B800"))
bpc <- ggplot(bcw, aes(x = class, y = uniformity_cell_shape))+geom_boxplot(aes(color = class))+
  scale_color_manual(values = c("#00AFBB", "#E7B800"))
bpd <- ggplot(bcw, aes(x = class, y = marginal_adhesion))+geom_boxplot(aes(color = class))+
  scale_color_manual(values = c("#00AFBB", "#E7B800"))
ggarrange(bpa,bpb,bpc,bpd,labels = c("A", "B", "C","D"),ncol = 2, nrow = 2, common.legend = TRUE, legend="bottom")

bpe <- ggplot(bcw, aes(x = class, y = single_epithelial_cell_size))+geom_boxplot(aes(color = class))+
  scale_color_manual(values = c("#00AFBB", "#E7B800"))
bpf <- ggplot(bcw, aes(x = class, y = bare_nuclei))+geom_boxplot(aes(color = class))+
  scale_color_manual(values = c("#00AFBB", "#E7B800"))
bpg <- ggplot(bcw, aes(x = class, y = bland_chromatin))+geom_boxplot(aes(color = class))+
  scale_color_manual(values = c("#00AFBB", "#E7B800"))
bph <- ggplot(bcw, aes(x = class, y = normal_nucleoli))+geom_boxplot(aes(color = class))+
  scale_color_manual(values = c("#00AFBB", "#E7B800"))
ggarrange(bpe,bpf,bpg,bph,labels = c("E", "F", "G","H"),ncol = 2, nrow = 2, common.legend = TRUE, legend="bottom")

bpi <- ggplot(bcw, aes(x = class, y = mitoses))+geom_boxplot(aes(color = class))+
  scale_color_manual(values = c("#00AFBB", "#E7B800"))
ggarrange(bpi,labels = c("I"),ncol = 2, nrow = 2) #, common.legend = TRUE, legend="bottom")

'plot_boxplot(bcw, by="class")'
## [1] "plot_boxplot(bcw, by=\"class\")"
#Construct Boxplot without grouping
library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths
bcwData <- melt(bcw)
## Using id_number, class as id variables
par(mar=c(10,7,1,1))
boxplot(data=bcwData, value~variable, las=2)

#pairs(bcw[2:10], pch = 21, bg = c("#d95f02", "#7570b3")[unclass(bcw$class)])

plot_correlation(bcw, type = "continuous") #correlation plot

#we can also do sum data preparation here; it is an error if the measurement does not fall within 1-10 i.e Minimum and Maximum
## NORMALIZATION
a<-(bcw$clump_thickness-min(bcw$clump_thickness))/(max(bcw$clump_thickness)-min(bcw$clump_thickness))
b<-(bcw$uniformity_cell_size-min(bcw$uniformity_cell_size))/(max(bcw$uniformity_cell_size)-min(bcw$uniformity_cell_size))
c<-(bcw$uniformity_cell_shape-min(bcw$uniformity_cell_shape))/(max(bcw$uniformity_cell_shape)-min(bcw$uniformity_cell_shape))
d<-(bcw$marginal_adhesion-min(bcw$marginal_adhesion))/(max(bcw$marginal_adhesion)-min(bcw$marginal_adhesion))
e<-(bcw$single_epithelial_cell_size-min(bcw$single_epithelial_cell_size))/(max(bcw$single_epithelial_cell_size)-min(bcw$single_epithelial_cell_size))
f<-(bcw$bare_nuclei-min(bcw$bare_nuclei))/(max(bcw$bare_nuclei)-min(bcw$bare_nuclei))
g<-(bcw$bland_chromatin-min(bcw$bland_chromatin))/(max(bcw$bland_chromatin)-min(bcw$bland_chromatin))
h<-(bcw$normal_nucleoli-min(bcw$normal_nucleoli))/(max(bcw$normal_nucleoli)-min(bcw$normal_nucleoli))
i<-(bcw$mitoses-min(bcw$mitoses))/(max(bcw$mitoses)-min(bcw$mitoses))
bcw1<-data.frame(bcw[,1],a,b,c,d,e,f,g,h,i,bcw[,11])
colnames(bcw1)<-c("ID","clump_thickness","uniformity_cell_size","uniformity_cell_shape","marginal_adhesion",
                  "single_epithelial_cell_size","bare_nuclei","bland_chromatin","normal_nucleoli","mitoses","class")
str(bcw1)
## 'data.frame':    683 obs. of  11 variables:
##  $ ID                         : chr  "1000025" "1002945" "1015425" "1016277" ...
##  $ clump_thickness            : num  0.444 0.444 0.222 0.556 0.333 ...
##  $ uniformity_cell_size       : num  0 0.333 0 0.778 0 ...
##  $ uniformity_cell_shape      : num  0 0.333 0 0.778 0 ...
##  $ marginal_adhesion          : num  0 0.444 0 0 0.222 ...
##  $ single_epithelial_cell_size: num  0.111 0.667 0.111 0.222 0.111 ...
##  $ bare_nuclei                : num  0 1 0.111 0.333 0 ...
##  $ bland_chromatin            : num  0.222 0.222 0.222 0.222 0.222 ...
##  $ normal_nucleoli            : num  0 0.111 0 0.667 0 ...
##  $ mitoses                    : num  0 0 0 0 0 ...
##  $ class                      : Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...
#View(bcw1)

#Boxplot of the normalized data
bcw1Data <- melt(bcw1)
## Using ID, class as id variables
par(mar=c(10,7,1,1))
boxplot(data=bcw1Data, value~variable, las=2)

## Clustering Start
library(stats)
library(cluster)
library("factoextra")
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(rgl)
library(scatterplot3d)
bcw.features <- bcw[,2:10] # We remove class labels
bcw.real.class.labels <- bcw[,11]
bcw.names <- paste(bcw$class, bcw$id_number, sep=" ") #we assign label names by including ID
## ACCESSING CLUSTERING TENDENCY
# Random data generated from the iris data set
random_df <- apply(bcw.features, 2, function(x){runif(length(x), min(x), (max(x)))})
random_df <- as.data.frame(random_df)

fviz_pca_ind(prcomp(bcw.features), title = "PCA - Breast Cancer data", habillage = bcw.real.class.labels,  palette = "jco",
             geom = "point", ggtheme = theme_classic(),legend = "bottom") # Plot bcw data set

fviz_pca_ind(prcomp(random_df), title = "PCA - Random data", geom = "point", ggtheme = theme_classic()) # Plot the random df

res.bcw.real <- get_clust_tendency(bcw.features, n = nrow(bcw.features)-1, graph = FALSE)
res.bcw.real$hopkins_stat
## [1] 0.7111959
res.bcw.random <- get_clust_tendency(random_df, n = nrow(random_df)-1, graph = FALSE)
res.bcw.random$hopkins_stat
## [1] 0.4986158
## Selecting the Optimal Number of Cluster
# Naive approach: 'elbow method' - We are looking for a strong bend in the chart, the so-called "elbow" or "knee".
fviz_nbclust(bcw.features, FUNcluster = kmeans, method="wss", k.max=10) + geom_vline(xintercept=2, linetype=2) #KMeans

fviz_nbclust(bcw.features, FUNcluster = cluster::pam, method="wss", k.max=10) + geom_vline(xintercept=2, linetype=2) #PAM

fviz_nbclust(bcw.features, FUNcluster = cluster::clara, method="wss", k.max=10) + geom_vline(xintercept=2, linetype=2) #CLARA

fviz_nbclust(bcw.features, FUNcluster = hcut, method="wss", k.max=10) + geom_vline(xintercept=2, linetype=2) # hierarchical clustering

# Other advanced methods used to select the optimal K: Silhouette.
fviz_nbclust(bcw.features, FUNcluster = kmeans, method = "silhouette") #KMeans

fviz_nbclust(bcw.features, FUNcluster = cluster::pam, method = "silhouette") # PAM

fviz_nbclust(bcw.features, FUNcluster = cluster::clara, method = "silhouette") # CLARA

fviz_nbclust(bcw.features, FUNcluster = hcut, method = "silhouette") # hierarchical clustering

# Using the NbClust
library(NbClust)
NbClust.results.1 <- NbClust(bcw.features, distance="euclidean", min.nc=2, max.nc=10, method="complete", index="all")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
## 

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 10 proposed 2 as the best number of clusters 
## * 2 proposed 3 as the best number of clusters 
## * 7 proposed 4 as the best number of clusters 
## * 1 proposed 5 as the best number of clusters 
## * 3 proposed 10 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  2 
##  
##  
## *******************************************************************
NbClust.results.1$All.index
##         KL       CH Hartigan     CCC    Scott      Marriot  TrCovW   TraceW
## 2  11.9005 717.2956  19.4655 16.7361 2217.346 1.349710e+30 5370507 23592.81
## 3   0.2747 378.0760 247.6635  3.0523 2459.171 2.131352e+30 5087813 22937.18
## 4  22.0334 425.7778  43.6937 10.4944 3219.390 1.244910e+30 3498839 16813.52
## 5   0.3649 350.2892  53.8250  9.9600 3579.914 1.147398e+30 3039867 15796.98
## 6   1.4747 312.7813  41.3879 11.2960 3914.334 1.012584e+30 2748377 14635.13
## 7   1.2650 283.0651  34.6282 11.7409 4292.956 7.917215e+29 2372785 13791.97
## 8   0.7710 259.6181  36.8175 12.0621 4538.338 7.219831e+29 2094970 13119.90
## 9   4.1627 243.7969  20.5218 12.8003 4855.276 5.745171e+29 1824134 12441.29
## 10  1.4853 225.2522  11.3643 13.5573 5039.868 5.413058e+29 1692788 12073.68
##    Friedman  Rubin Cindex     DB Silhouette   Duda Pseudot2  Beale Ratkowsky
## 2   59.3419 4.7661 0.3069 0.9217     0.5516 0.9366  11.1766 0.4043    0.4840
## 3   60.5916 4.9023 0.3112 1.3884     0.5187 0.5381 441.1776 5.1442    0.4095
## 4   67.3357 6.6878 0.2299 1.6997     0.4882 0.8899  19.4300 0.7385    0.3926
## 5   70.8713 7.1181 0.2241 2.1068     0.4804 0.7521  28.6715 1.9565    0.3567
## 6   73.7998 7.6832 0.2342 1.8653     0.4855 0.7609  21.3667 1.8595    0.3328
## 7   79.0534 8.1529 0.2329 1.7560     0.4854 0.7930  17.7512 1.5449    0.3117
## 8   82.3080 8.5706 0.2335 1.7606     0.4577 0.7257  21.5471 2.2308    0.2936
## 9   88.0672 9.0380 0.2458 1.7441     0.4610 0.5837  12.1247 4.0449    0.2786
## 10  90.4255 9.3132 0.2475 1.6303     0.4439 0.5531   4.8480 4.1589    0.2660
##         Ball Ptbiserial   Frey McClain   Dunn Hubert SDindex Dindex   SDbw
## 2  11796.407     0.7681 0.5460  0.2213 0.1638      0  0.3723 4.9496 0.6338
## 3   7645.728     0.7727 0.6667  0.2246 0.1679      0  0.4700 4.8947 0.7078
## 4   4203.379     0.8407 0.5600  0.3034 0.1701      0  0.5420 4.1351 0.6477
## 5   3159.396     0.8515 0.4360  0.3135 0.1639      0  0.6668 4.0377 0.6518
## 6   2439.188     0.8557 0.4960  0.3148 0.1765      0  0.5972 3.9161 0.6226
## 7   1970.281     0.8579 0.4395  0.3158 0.1790      0  0.5956 3.8208 0.5794
## 8   1639.987     0.8601 0.6144  0.3162 0.1826      0  0.5877 3.7485 0.5768
## 9   1382.366     0.8610 0.3082  0.3173 0.1949      0  0.5738 3.6622 0.5385
## 10  1207.368     0.8612 0.2006  0.3173 0.1965      0  0.5449 3.6204 0.5034
NbClust.results.1$Best.nc
##                      KL       CH Hartigan     CCC    Scott      Marriot  TrCovW
## Number_clusters  4.0000   2.0000    3.000  2.0000   4.0000 4.000000e+00       4
## Value_Index     22.0334 717.2956  228.198 16.7361 760.2184 7.889303e+29 1588974
##                   TraceW Friedman   Rubin Cindex     DB Silhouette   Duda
## Number_clusters    4.000   4.0000  4.0000 5.0000 2.0000     2.0000 2.0000
## Value_Index     5107.132   6.7441 -1.3551 0.2241 0.9217     0.5516 0.9366
##                 PseudoT2  Beale Ratkowsky     Ball PtBiserial Frey McClain
## Number_clusters   2.0000 2.0000     2.000    3.000    10.0000    1  2.0000
## Value_Index      11.1766 0.4043     0.484 4150.679     0.8612   NA  0.2213
##                    Dunn Hubert SDindex Dindex    SDbw
## Number_clusters 10.0000      0  2.0000      0 10.0000
## Value_Index      0.1965      0  0.3723      0  0.5034
NbClust.results.1$Best.partition
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
##   1   1   1   2   1   2   1   1   1   1   1   1   1   1   1   1   1   1   2   1 
##  21  22  23  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  42 
##   1   2   1   1   1   1   1   1   1   1   1   2   1   1   1   2   1   2   1   1 
##  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62 
##   2   1   2   1   2   1   1   2   1   1   2   2   2   1   2   1   1   1   1   1 
##  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82 
##   2   1   1   1   1   2   2   1   1   2   1   1   1   1   1   1   1   1   1   1 
##  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102 
##   1   1   2   2   1   2   1   1   1   1   1   1   1   1   1   1   2   2   2   1 
## 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 
##   1   1   2   1   2   2   1   2   1   1   1   2   1   1   1   2   1   1   1   1 
## 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 141 142 143 
##   2   1   2   1   1   1   2   1   1   1   2   1   1   1   1   1   1   1   1   1 
## 144 145 147 148 149 150 151 152 153 154 155 156 157 158 160 161 162 163 164 166 
##   1   1   1   1   1   2   1   1   2   1   1   1   1   1   2   2   1   1   1   1 
## 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 
##   2   2   1   1   1   1   1   2   2   2   1   2   1   1   1   1   1   2   2   1 
## 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 
##   2   2   2   1   2   2   1   1   1   1   2   1   1   1   2   2   1   1   1   2 
## 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 
##   2   1   1   1   2   2   1   2   2   2   1   1   2   1   1   2   1   2   2   1 
## 227 228 229 230 231 232 233 234 235 237 238 239 240 241 242 243 244 245 246 247 
##   2   2   1   2   2   2   1   1   1   2   2   2   1   1   1   1   1   1   1   2 
## 248 249 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 
##   1   1   1   1   1   2   2   1   1   1   1   1   2   2   2   1   1   1   2   1 
## 269 270 271 272 273 274 275 277 278 279 280 281 282 283 284 285 286 287 288 289 
##   2   1   2   1   1   1   1   1   1   1   1   1   1   2   1   2   2   2   1   1 
## 290 291 292 294 296 297 299 300 301 302 303 304 305 306 307 308 309 310 311 312 
##   2   1   1   1   2   1   1   1   2   1   2   1   1   2   1   1   2   1   1   1 
## 313 314 315 317 318 319 320 321 323 324 325 326 327 328 329 330 331 332 333 334 
##   1   1   1   1   2   1   1   1   1   1   1   1   1   1   2   2   1   1   1   1 
## 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 
##   1   1   1   1   1   1   1   1   1   1   2   1   1   1   1   1   1   1   1   2 
## 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 
##   1   1   1   2   2   1   2   2   1   1   1   1   2   2   1   1   1   1   1   1 
## 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 
##   1   1   1   1   1   1   1   1   1   1   1   1   2   1   1   1   1   2   1   1 
## 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 413 414 415 
##   1   1   1   1   1   1   2   1   1   1   1   1   1   1   1   1   1   2   1   2 
## 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 
##   1   2   1   1   1   1   2   1   1   1   2   1   2   1   1   1   1   1   1   2 
## 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 
##   2   2   1   1   1   1   1   1   1   1   1   1   1   1   2   1   1   1   2   1 
## 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 
##   1   1   2   1   1   1   1   1   1   1   2   2   2   1   1   1   1   1   1   1 
## 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 
##   1   1   1   1   2   1   1   2   2   1   1   1   2   1   1   1   2   1   2   1 
## 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 
##   1   1   1   1   1   1   1   1   1   1   1   2   1   1   1   1   1   1   1   2 
## 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 
##   2   1   1   1   2   1   1   1   1   1   1   1   1   1   1   2   1   1   1   1 
## 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 
##   1   1   1   1   1   1   1   1   1   1   1   2   1   1   2   1   1   1   1   1 
## 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 
##   1   1   1   1   1   1   1   1   1   1   2   1   1   2   2   2   2   1   1   2 
## 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 
##   1   1   1   1   1   1   2   2   1   1   1   2   1   2   1   2   2   1   1   2 
## 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 
##   1   1   1   1   1   1   1   1   2   1   2   1   1   2   1   1   2   2   1   1 
## 616 617 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 
##   1   1   1   1   1   1   1   1   1   1   2   1   1   1   1   1   1   2   1   1 
## 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 
##   2   1   1   1   1   1   1   1   1   1   1   1   2   1   1   1   1   1   1   1 
## 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 
##   1   1   2   1   1   1   1   1   1   1   1   1   2   2   2   1   1   1   1   1 
## 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 
##   1   1   1   1   2   2   1   1   1   1   1   1   1   1   1   2   1   1   1   1 
## 697 698 699 
##   2   2   2
factoextra::fviz_nbclust(NbClust.results.1) + theme_minimal() + ggtitle("Optimal number of clusters")
## Warning in if (class(best_nc) == "numeric") print(best_nc) else if
## (class(best_nc) == : the condition has length > 1 and only the first element
## will be used
## Warning in if (class(best_nc) == "matrix") .viz_NbClust(x, print.summary, : the
## condition has length > 1 and only the first element will be used
## Warning in if (class(best_nc) == "numeric") print(best_nc) else if
## (class(best_nc) == : the condition has length > 1 and only the first element
## will be used
## Warning in if (class(best_nc) == "matrix") {: the condition has length > 1 and
## only the first element will be used
## Among all indices: 
## ===================
## * 2 proposed  0 as the best number of clusters
## * 1 proposed  1 as the best number of clusters
## * 10 proposed  2 as the best number of clusters
## * 2 proposed  3 as the best number of clusters
## * 7 proposed  4 as the best number of clusters
## * 1 proposed  5 as the best number of clusters
## * 3 proposed  10 as the best number of clusters
## 
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is  2 .

## Internal Cluster Validation
library(clValid)
library(mclust)
## Package 'mclust' version 5.4.7
## Type 'citation("mclust")' for citing this R package in publications.
## 
## Attaching package: 'mclust'
## The following object is masked from 'package:purrr':
## 
##     map
'methods <- c("agnes","kmeans", "diana", "pam", "clara")
K.range <- 2:5 # range for number of clusters
internal.validation <- clValid(bcw.features, nClust=K.range, clMethods=methods, validation="internal")
y
summary(internal.validation)
optimalScores(internal.validation)
par(mfrow = c(2, 2))
plot(internal.validation, legend = FALSE, lwd=2)
plot.new()
legend("center", clusterMethods(internal.validation), col=1:9, lty=1:9, pch=paste(1:9))

stability.validation <- clValid(bcw.features, nClust=K.range, clMethods=methods, validation="stability")
y
summary(stability.validation)
optimalScores(stability.validation)
par(mfrow = c(2,2))
plot(stability.validation, measure=c("APN","AD","ADM"), legend=FALSE, lwd=2)
plot.new()
legend("center", clusterMethods(stability.validation), col=1:9, lty=1:9, pch=paste(1:9))'
## [1] "methods <- c(\"agnes\",\"kmeans\", \"diana\", \"pam\", \"clara\")\nK.range <- 2:5 # range for number of clusters\ninternal.validation <- clValid(bcw.features, nClust=K.range, clMethods=methods, validation=\"internal\")\ny\nsummary(internal.validation)\noptimalScores(internal.validation)\npar(mfrow = c(2, 2))\nplot(internal.validation, legend = FALSE, lwd=2)\nplot.new()\nlegend(\"center\", clusterMethods(internal.validation), col=1:9, lty=1:9, pch=paste(1:9))\n\nstability.validation <- clValid(bcw.features, nClust=K.range, clMethods=methods, validation=\"stability\")\ny\nsummary(stability.validation)\noptimalScores(stability.validation)\npar(mfrow = c(2,2))\nplot(stability.validation, measure=c(\"APN\",\"AD\",\"ADM\"), legend=FALSE, lwd=2)\nplot.new()\nlegend(\"center\", clusterMethods(stability.validation), col=1:9, lty=1:9, pch=paste(1:9))"
## PCA
bcw.pca <- bcw[, 2:10]
prcomp(bcw.pca, retx=T, center=T, scale.=T) -> bcw.after.pca

library(factoextra)
fviz_eig(bcw.after.pca) #The Knee Plot for PCA 

fviz_pca_var(bcw.after.pca, col.var = "contrib", # Color by contributions to the PC
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
)

# Results for Variables
res.bcw.pca <- get_pca_var(bcw.after.pca)
res.bcw.pca$coord          # Coordinates
##                                  Dim.1       Dim.2         Dim.3        Dim.4
## clump_thickness             -0.7336763 -0.12402816  0.6362101794 -0.073103185
## uniformity_cell_size        -0.9249037 -0.04108445 -0.0146410842  0.138476643
## uniformity_cell_shape       -0.9171059 -0.07260418  0.0246083043  0.119229545
## marginal_adhesion           -0.8081485 -0.04588881 -0.3030226066 -0.334350435
## single_epithelial_cell_size -0.8166750  0.14482028 -0.0644326696  0.289748243
## bare_nuclei                 -0.8138416 -0.23013886  0.0005077785 -0.338041989
## bland_chromatin             -0.8397818 -0.20090791 -0.1564667443 -0.008845551
## normal_nucleoli             -0.8151140  0.02991976 -0.0985836639  0.282785536
## mitoses                     -0.5591457  0.79768592  0.0591084558 -0.175582935
##                                   Dim.5       Dim.6        Dim.7       Dim.8
## clump_thickness              0.04953131 -0.13324709 -0.004620507  0.12648504
## uniformity_cell_size        -0.08981904 -0.07638857 -0.111466349 -0.22278472
## uniformity_cell_shape       -0.06684128 -0.04094765 -0.069022299 -0.29755368
## marginal_adhesion           -0.01206751 -0.35967455  0.067189001  0.08345318
## single_epithelial_cell_size -0.39262650  0.03808059  0.114496139  0.23420699
## bare_nuclei                 -0.07694312  0.33472577  0.218549434 -0.06467187
## bland_chromatin              0.14039350  0.16422401 -0.380038688  0.19593569
## normal_nucleoli              0.42562850  0.01182280  0.249473012  0.03779217
## mitoses                      0.06477553  0.08150570 -0.071685215 -0.02733719
##                                     Dim.9
## clump_thickness             -0.0008167946
## uniformity_cell_size        -0.2179785924
## uniformity_cell_shape        0.1984374718
## marginal_adhesion            0.0136811963
## single_epithelial_cell_size  0.0198861241
## bare_nuclei                 -0.0227459864
## bland_chromatin              0.0185038372
## normal_nucleoli             -0.0065638440
## mitoses                      0.0022285395
res.bcw.pca$contrib        # Contributions to the PCs
##                                 Dim.1      Dim.2        Dim.3      Dim.4
## clump_thickness              9.124180  1.9824789 7.506012e+01  1.1626972
## uniformity_cell_size        14.500329  0.2175319 3.975159e-02  4.1720268
## uniformity_cell_shape       14.256857  0.6793463 1.122978e-01  3.0928711
## marginal_adhesion           11.070498  0.2713824 1.702778e+01 24.3219182
## single_epithelial_cell_size 11.305333  2.7028802 7.698751e-01 18.2656722
## bare_nuclei                 11.227024  6.8257112 4.781418e-05 24.8619583
## bland_chromatin             11.954124  5.2019008 4.539961e+00  0.0170233
## normal_nucleoli             11.262157  0.1153677 1.802262e+00 17.3983643
## mitoses                      5.299499 82.0034006 6.478989e-01  6.7074686
##                                   Dim.5       Dim.6        Dim.7      Dim.8
## clump_thickness              0.64515022  5.88147462  0.007251659  6.1358900
## uniformity_cell_size         2.12147586  1.93298082  4.220323512 19.0357528
## uniformity_cell_shape        1.17487272  0.55542925  1.618218004 33.9570458
## marginal_adhesion            0.03829449 42.85388274  1.533396807  2.6710683
## single_epithelial_cell_size 40.53782907  0.48037256  4.452868511 21.0377340
## bare_nuclei                  1.55682870 37.11496662 16.223986031  1.6040952
## bland_chromatin              5.18316779  8.93396163 49.058448484 14.7240179
## normal_nucleoli             47.63900579  0.04630327 21.140016973  0.5477757
## mitoses                      1.10337536  2.20062849  1.745490019  0.2866203
##                                    Dim.9
## clump_thickness             7.548417e-04
## uniformity_cell_size        5.375983e+01
## uniformity_cell_shape       4.455306e+01
## marginal_adhesion           2.117768e-01
## single_epithelial_cell_size 4.474355e-01
## bare_nuclei                 5.853825e-01
## bland_chromatin             3.873948e-01
## normal_nucleoli             4.874686e-02
## mitoses                     5.619153e-03
res.bcw.pca$cos2           # Quality of representation
##                                 Dim.1        Dim.2        Dim.3        Dim.4
## clump_thickness             0.5382809 0.0153829836 4.047634e-01 5.344076e-03
## uniformity_cell_size        0.8554468 0.0016879317 2.143613e-04 1.917578e-02
## uniformity_cell_shape       0.8410832 0.0052713666 6.055686e-04 1.421568e-02
## marginal_adhesion           0.6531039 0.0021057833 9.182270e-02 1.117902e-01
## single_epithelial_cell_size 0.6669580 0.0209729147 4.151569e-03 8.395404e-02
## bare_nuclei                 0.6623382 0.0529638934 2.578390e-07 1.142724e-01
## bland_chromatin             0.7052335 0.0403639873 2.448184e-02 7.824378e-05
## normal_nucleoli             0.6644109 0.0008951922 9.718739e-03 7.996766e-02
## mitoses                     0.3126439 0.6363028325 3.493810e-03 3.082937e-02
##                                    Dim.5        Dim.6        Dim.7        Dim.8
## clump_thickness             0.0024533504 0.0177547869 2.134908e-05 0.0159984655
## uniformity_cell_size        0.0080674599 0.0058352139 1.242475e-02 0.0496330332
## uniformity_cell_shape       0.0044677570 0.0016767101 4.764078e-03 0.0885381946
## marginal_adhesion           0.0001456247 0.1293657803 4.514362e-03 0.0069644328
## single_epithelial_cell_size 0.1541555654 0.0014501316 1.310937e-02 0.0548529161
## bare_nuclei                 0.0059202432 0.1120413440 4.776386e-02 0.0041824513
## bland_chromatin             0.0197103343 0.0269695263 1.444294e-01 0.0383907944
## normal_nucleoli             0.1811596241 0.0001397787 6.223678e-02 0.0014282478
## mitoses                     0.0041958698 0.0066431792 5.138770e-03 0.0007473219
##                                    Dim.9
## clump_thickness             6.671534e-07
## uniformity_cell_size        4.751467e-02
## uniformity_cell_shape       3.937743e-02
## marginal_adhesion           1.871751e-04
## single_epithelial_cell_size 3.954579e-04
## bare_nuclei                 5.173799e-04
## bland_chromatin             3.423920e-04
## normal_nucleoli             4.308405e-05
## mitoses                     4.966388e-06
library("corrplot")
## corrplot 0.84 loaded
corrplot(res.bcw.pca$cos2, is.corr=FALSE)

fviz_contrib(bcw.after.pca, choice = "var", axes = 1:2, top = 9) # Contributions of variables to PC1 and PC2

fviz_contrib(bcw.after.pca, choice = "var", axes = 1, top = 9) # Contributions of variables to PC1

fviz_contrib(bcw.after.pca, choice = "var", axes = 2, top = 9) # Contributions of variables to PC2

print("Principal components:")
## [1] "Principal components:"
print(bcw.after.pca$rotation)
##                                    PC1         PC2          PC3         PC4
## clump_thickness             -0.3020626 -0.14080053  0.866372452 -0.10782844
## uniformity_cell_size        -0.3807930 -0.04664031 -0.019937801  0.20425540
## uniformity_cell_shape       -0.3775825 -0.08242247  0.033510871  0.17586560
## marginal_adhesion           -0.3327236 -0.05209438 -0.412647341 -0.49317257
## single_epithelial_cell_size -0.3362340  0.16440439 -0.087742529  0.42738358
## bare_nuclei                 -0.3350675 -0.26126062  0.000691478 -0.49861767
## bland_chromatin             -0.3457474 -0.22807676 -0.213071845 -0.01304734
## normal_nucleoli             -0.3355914  0.03396582 -0.134248356  0.41711347
## mitoses                     -0.2302064  0.90555729  0.080492170 -0.25898781
##                                     PC5         PC6          PC7         PC8
## clump_thickness              0.08032124 -0.24251752 -0.008515668  0.24770729
## uniformity_cell_size        -0.14565287 -0.13903168 -0.205434260 -0.43629981
## uniformity_cell_shape       -0.10839155 -0.07452713 -0.127209198 -0.58272674
## marginal_adhesion           -0.01956898 -0.65462877  0.123830400  0.16343403
## single_epithelial_cell_size -0.63669325  0.06930891  0.211018210  0.45866910
## bare_nuclei                 -0.12477294  0.60922054  0.402790095 -0.12665288
## bland_chromatin              0.22766572  0.29889733 -0.700417365  0.38371888
## normal_nucleoli              0.69021015  0.02151820  0.459782742  0.07401187
## mitoses                      0.10504168  0.14834515 -0.132116994 -0.05353693
##                                      PC9
## clump_thickness             -0.002747438
## uniformity_cell_size        -0.733210938
## uniformity_cell_shape        0.667480798
## marginal_adhesion            0.046019211
## single_epithelial_cell_size  0.066890623
## bare_nuclei                 -0.076510293
## bland_chromatin              0.062241047
## normal_nucleoli             -0.022078692
## mitoses                      0.007496101
summary(bcw.after.pca) #summary of loadings on components
## Importance of components:
##                           PC1     PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.4289 0.88088 0.73434 0.67796 0.61667 0.54943 0.54259
## Proportion of Variance 0.6555 0.08622 0.05992 0.05107 0.04225 0.03354 0.03271
## Cumulative Proportion  0.6555 0.74172 0.80163 0.85270 0.89496 0.92850 0.96121
##                            PC8     PC9
## Standard deviation     0.51062 0.29729
## Proportion of Variance 0.02897 0.00982
## Cumulative Proportion  0.99018 1.00000
# We analyse the amount of variance explained by subsequent principal components
variance <- round(((bcw.after.pca$sdev ^2)/sum(bcw.after.pca$sdev^2)), 4)
cumulative.variance <- cumsum(variance)
pca.df.var <- data.frame(PCs=c("Dim_1","Dim_2","Dim_3","Dim_4","Dim_5","Dim_6","Dim_7","Dim_8","Dim_9"),
                         Variance=variance, Cummulative_Variance=cumulative.variance)
ggplot(pca.df.var, aes(x=PCs, y=Variance, fill=PCs)) + geom_bar(stat="identity") + 
  geom_text(aes(label=Variance), vjust=-0.3, size=3.5) #Variance Explained by PCA

ggplot(pca.df.var, aes(x=PCs, y=Cummulative_Variance, fill=PCs)) + geom_bar(stat="identity") + 
  geom_text(aes(label=Cummulative_Variance), vjust=-0.3, size=3.5) #Cummulative Variance by PCA

pca.features<-data.frame(bcw.after.pca$x[,1], bcw.after.pca$x[,2], bcw.after.pca$x[,3])
colnames(bcw1)<-c("PC1","PC2","PC3")
## K-MEANS
k <- 2 # Partition into K clusters

### FOR ALL FEATURES
kmeans.k2.10x <- kmeans(bcw.features, 2, iter.max=10, nstart=10)
bcw.kmeans.labels <- kmeans.k2.10x$cluster
plot(bcw.features, col=kmeans.k2.10x$cluster)
title('K-Means Clustering for Breast Cancer Problem (10 random initialization)')

bcw.sil.kmeans <- silhouette(bcw.kmeans.labels, dist(bcw.features))
fviz_silhouette(bcw.sil.kmeans, xlab="K-means") #silhouette information
##   cluster size ave.sil.width
## 1       1  453          0.76
## 2       2  230          0.28

# Visualization of cluster analysis results (3D scatterplot)
plot3d(bcw.features$uniformity_cell_size, bcw.features$uniformity_cell_shape, bcw$clump_thickness, col=bcw.kmeans.labels, 
       pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="uniformity_cell_size", 
       ylab="uniformity_cell_shape", zlab="clump thickness")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels), 
         col = bcw.kmeans.labels, cex=1, inset=c(0.02))


### FOR SELECTED PCA FEATURES
kmeans.k2.10xpca <- kmeans(pca.features, centers=k, iter.max=10, nstart=10)
pca.kmeans.labels <- kmeans.k2.10xpca$cluster
plot(pca.features, col=kmeans.k2.10xpca$cluster)
title('K-Means Clustering for Breast Cancer Problem PCA (10 random initialization)')

pca.sil.kmeans <- silhouette(pca.kmeans.labels, dist(pca.features))
fviz_silhouette(pca.sil.kmeans, xlab="K-means") #silhouette information
##   cluster size ave.sil.width
## 1       1  232          0.41
## 2       2  451          0.80

plot3d(pca.features$bcw.after.pca.x...1., pca.features$bcw.after.pca.x...2., pca.features$bcw.after.pca.x...3., col=pca.kmeans.labels, 
       pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="Dim_1", 
       ylab="Dim_2", zlab="Dim_3")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels), 
         col = pca.kmeans.labels, cex=1, inset=c(0.02))
## PAM
library(cluster)

### Application of PAM algorithm FOR ALL FEATURES
bcw.pam2 <- pam(x=bcw.features, k=2)
X11()
plot(bcw.pam2) # default visualization (note: plot() works differently for quantitative and mixed data types)

(summary(bcw.pam2)) 
## Medoids:
##      ID clump_thickness uniformity_cell_size uniformity_cell_shape
## 23   23               3                    1                     1
## 468 453               6                    6                     6
##     marginal_adhesion single_epithelial_cell_size bare_nuclei bland_chromatin
## 23                  1                           2           1               2
## 468                 5                           4          10               7
##     normal_nucleoli mitoses
## 23                1       1
## 468               6       2
## Clustering vector:
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
##   1   2   1   2   1   2   1   1   1   1   1   1   1   1   2   1   1   1   2   1 
##  21  22  23  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  42 
##   2   2   1   1   2   1   1   1   1   1   1   2   1   1   1   2   1   2   2   2 
##  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62 
##   2   1   2   1   2   1   1   2   1   1   2   2   2   2   2   1   2   1   2   1 
##  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82 
##   2   1   1   1   1   2   2   1   1   2   1   2   1   1   1   1   1   1   1   1 
##  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102 
##   1   1   2   2   2   2   1   1   1   1   1   1   1   1   1   1   2   2   2   1 
## 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 
##   1   1   2   1   2   2   1   2   1   2   2   2   1   1   1   2   1   1   1   1 
## 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 141 142 143 
##   2   2   2   1   2   1   2   1   1   1   2   1   1   1   1   1   1   1   1   2 
## 144 145 147 148 149 150 151 152 153 154 155 156 157 158 160 161 162 163 164 166 
##   1   1   2   1   1   2   1   2   2   1   1   2   1   1   2   2   1   1   1   1 
## 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 
##   2   2   1   1   1   1   1   2   2   2   1   2   1   2   1   1   1   2   2   1 
## 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 
##   2   2   2   1   2   2   1   1   1   1   2   1   1   1   2   2   1   1   1   2 
## 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 
##   2   1   1   1   2   2   1   2   2   2   1   1   2   1   1   2   1   2   2   1 
## 227 228 229 230 231 232 233 234 235 237 238 239 240 241 242 243 244 245 246 247 
##   2   2   1   2   2   2   1   2   1   2   2   2   2   1   1   1   1   1   1   2 
## 248 249 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 
##   2   1   1   2   2   2   2   2   1   1   1   2   2   2   2   2   2   1   2   2 
## 269 270 271 272 273 274 275 277 278 279 280 281 282 283 284 285 286 287 288 289 
##   2   1   2   1   2   1   1   1   1   1   2   1   1   2   2   2   2   2   1   2 
## 290 291 292 294 296 297 299 300 301 302 303 304 305 306 307 308 309 310 311 312 
##   2   1   1   2   2   2   1   2   2   1   2   1   2   2   1   1   2   1   1   1 
## 313 314 315 317 318 319 320 321 323 324 325 326 327 328 329 330 331 332 333 334 
##   2   1   1   2   2   1   2   2   1   2   1   1   2   1   2   2   2   1   1   2 
## 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 
##   2   1   2   1   1   2   2   1   1   1   2   1   1   1   1   2   1   1   2   2 
## 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 
##   1   1   1   2   2   2   2   2   1   1   1   1   2   2   1   1   1   1   1   1 
## 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 
##   1   1   1   1   1   1   1   2   1   1   1   1   2   1   1   1   1   2   1   1 
## 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 413 414 415 
##   1   1   1   1   1   1   2   1   1   1   1   1   1   1   1   1   1   2   1   2 
## 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 
##   1   2   1   1   1   1   2   1   1   1   2   1   2   1   1   1   1   1   1   2 
## 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 
##   2   2   1   1   1   2   1   1   1   1   1   1   1   1   2   1   1   1   2   1 
## 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 
##   1   2   2   1   1   1   1   1   1   1   2   2   2   1   1   1   1   1   1   1 
## 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 
##   1   1   1   1   2   1   1   2   2   1   1   1   2   2   1   1   2   1   2   1 
## 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 
##   1   1   1   1   1   1   1   1   1   1   1   2   1   1   1   1   1   1   1   2 
## 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 
##   2   1   1   1   2   1   1   2   2   1   1   1   1   1   1   2   1   1   1   1 
## 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 
##   1   1   1   1   1   1   1   1   1   1   1   2   1   1   2   1   1   1   1   1 
## 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 
##   1   1   1   1   1   1   1   1   1   1   2   1   1   2   2   2   2   1   1   2 
## 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 
##   1   1   1   1   1   1   2   2   1   1   1   2   1   2   1   2   2   2   1   2 
## 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 
##   1   1   1   1   1   1   1   1   2   2   2   1   1   2   1   2   2   2   1   1 
## 616 617 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 
##   1   1   1   1   1   1   1   1   1   1   2   1   1   1   1   1   1   2   1   1 
## 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 
##   2   1   1   1   1   1   1   1   1   1   1   1   2   1   1   1   1   1   1   1 
## 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 
##   1   1   2   1   1   1   1   1   1   1   1   1   2   2   2   1   1   1   1   1 
## 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 
##   1   1   1   1   2   2   1   1   1   1   1   1   1   1   1   2   1   1   1   1 
## 697 698 699 
##   2   2   2 
## Objective function:
##    build     swap 
## 4.949396 4.522860 
## 
## Numerical information per cluster:
##      size max_diss  av_diss diameter separation
## [1,]  454 12.24745 2.613916 15.55635   3.605551
## [2,]  229 15.84298 8.307403 23.19483   3.605551
## 
## Isolated clusters:
##  L-clusters: character(0)
##  L*-clusters: character(0)
## 
## Silhouette plot information:
##     cluster neighbor     sil_width
## 23        1        2  0.8390551922
## 200       1        2  0.8390551922
## 258       1        2  0.8390551922
## 277       1        2  0.8390551922
## 393       1        2  0.8390551922
## 396       1        2  0.8390551922
## 525       1        2  0.8390551922
## 534       1        2  0.8390551922
## 551       1        2  0.8390551922
## 564       1        2  0.8390551922
## 573       1        2  0.8390551922
## 599       1        2  0.8390551922
## 601       1        2  0.8390551922
## 617       1        2  0.8390551922
## 621       1        2  0.8390551922
## 642       1        2  0.8390551922
## 643       1        2  0.8390551922
## 646       1        2  0.8390551922
## 650       1        2  0.8390551922
## 656       1        2  0.8390551922
## 12        1        2  0.8344997928
## 29        1        2  0.8344997928
## 36        1        2  0.8344997928
## 145       1        2  0.8344997928
## 366       1        2  0.8344997928
## 430       1        2  0.8344997928
## 535       1        2  0.8344997928
## 559       1        2  0.8344997928
## 138       1        2  0.8285402572
## 141       1        2  0.8285402572
## 171       1        2  0.8285402572
## 257       1        2  0.8285402572
## 402       1        2  0.8285402572
## 425       1        2  0.8285402572
## 476       1        2  0.8285402572
## 542       1        2  0.8285402572
## 555       1        2  0.8285402572
## 584       1        2  0.8285402572
## 635       1        2  0.8285402572
## 693       1        2  0.8285402572
## 35        1        2  0.8274131361
## 375       1        2  0.8274131361
## 410       1        2  0.8274131361
## 31        1        2  0.8263562573
## 471       1        2  0.8263562573
## 487       1        2  0.8263562573
## 496       1        2  0.8263562573
## 514       1        2  0.8263562573
## 142       1        2  0.8251395634
## 343       1        2  0.8251395634
## 384       1        2  0.8251395634
## 385       1        2  0.8251395634
## 446       1        2  0.8251395634
## 510       1        2  0.8251395634
## 629       1        2  0.8251395634
## 645       1        2  0.8251395634
## 680       1        2  0.8251395634
## 696       1        2  0.8251395634
## 17        1        2  0.8240760882
## 137       1        2  0.8240760882
## 499       1        2  0.8240760882
## 500       1        2  0.8240760882
## 502       1        2  0.8240760882
## 539       1        2  0.8240760882
## 544       1        2  0.8240760882
## 603       1        2  0.8240760882
## 619       1        2  0.8240760882
## 654       1        2  0.8240760882
## 134       1        2  0.8239000236
## 694       1        2  0.8237987256
## 128       1        2  0.8236350612
## 163       1        2  0.8236350612
## 169       1        2  0.8236350612
## 195       1        2  0.8236350612
## 259       1        2  0.8236350612
## 281       1        2  0.8236350612
## 323       1        2  0.8236350612
## 397       1        2  0.8236350612
## 655       1        2  0.8236350612
## 668       1        2  0.8236350612
## 615       1        2  0.8228477812
## 135       1        2  0.8218503436
## 288       1        2  0.8218503436
## 32        1        2  0.8202696893
## 95        1        2  0.8202696893
## 132       1        2  0.8202696893
## 158       1        2  0.8202696893
## 177       1        2  0.8202696893
## 243       1        2  0.8202696893
## 352       1        2  0.8202696893
## 365       1        2  0.8202696893
## 673       1        2  0.8202696893
## 389       1        2  0.8200982348
## 80        1        2  0.8182128922
## 311       1        2  0.8182128922
## 92        1        2  0.8178668469
## 526       1        2  0.8178668469
## 549       1        2  0.8173836949
## 398       1        2  0.8148621037
## 438       1        2  0.8148621037
## 465       1        2  0.8148621037
## 469       1        2  0.8148621037
## 474       1        2  0.8148621037
## 478       1        2  0.8148621037
## 527       1        2  0.8148621037
## 630       1        2  0.8148621037
## 639       1        2  0.8148621037
## 689       1        2  0.8148621037
## 548       1        2  0.8145142250
## 48        1        2  0.8143620951
## 65        1        2  0.8143620951
## 94        1        2  0.8143620951
## 126       1        2  0.8143620951
## 173       1        2  0.8143620951
## 217       1        2  0.8143620951
## 226       1        2  0.8143620951
## 278       1        2  0.8143620951
## 328       1        2  0.8143620951
## 355       1        2  0.8143620951
## 377       1        2  0.8143620951
## 406       1        2  0.8143620951
## 408       1        2  0.8143620951
## 411       1        2  0.8143620951
## 418       1        2  0.8143620951
## 429       1        2  0.8143620951
## 574       1        2  0.8143620951
## 578       1        2  0.8143620951
## 579       1        2  0.8143620951
## 661       1        2  0.8143620951
## 675       1        2  0.8143620951
## 97        1        2  0.8142818875
## 27        1        2  0.8142136063
## 139       1        2  0.8138025307
## 373       1        2  0.8138025307
## 597       1        2  0.8138025307
## 567       1        2  0.8130306090
## 82        1        2  0.8129721906
## 503       1        2  0.8129721906
## 493       1        2  0.8124619951
## 530       1        2  0.8124619951
## 399       1        2  0.8116969049
## 10        1        2  0.8107293301
## 275       1        2  0.8099987251
## 186       1        2  0.8097913000
## 18        1        2  0.8095918698
## 67        1        2  0.8095918698
## 89        1        2  0.8095918698
## 93        1        2  0.8095918698
## 179       1        2  0.8095918698
## 196       1        2  0.8095918698
## 504       1        2  0.8095918698
## 528       1        2  0.8095918698
## 662       1        2  0.8095918698
## 8         1        2  0.8095046807
## 3         1        2  0.8091069187
## 34        1        2  0.8090805311
## 282       1        2  0.8090805311
## 519       1        2  0.8087325773
## 90        1        2  0.8081013813
## 155       1        2  0.8067158642
## 182       1        2  0.8067158642
## 199       1        2  0.8067158642
## 291       1        2  0.8067158642
## 312       1        2  0.8067158642
## 314       1        2  0.8067158642
## 336       1        2  0.8067158642
## 344       1        2  0.8067158642
## 346       1        2  0.8067158642
## 376       1        2  0.8067158642
## 381       1        2  0.8067158642
## 447       1        2  0.8067158642
## 491       1        2  0.8067158642
## 505       1        2  0.8067158642
## 511       1        2  0.8067158642
## 586       1        2  0.8067158642
## 608       1        2  0.8067158642
## 624       1        2  0.8067158642
## 633       1        2  0.8067158642
## 644       1        2  0.8067158642
## 660       1        2  0.8067158642
## 666       1        2  0.8067158642
## 679       1        2  0.8067158642
## 684       1        2  0.8067158642
## 685       1        2  0.8067158642
## 686       1        2  0.8067158642
## 687       1        2  0.8067158642
## 477       1        2  0.8051785275
## 607       1        2  0.8047710643
## 315       1        2  0.8038160459
## 339       1        2  0.8038160459
## 378       1        2  0.8038160459
## 518       1        2  0.8038160459
## 602       1        2  0.8038160459
## 470       1        2  0.8028755589
## 677       1        2  0.8028755589
## 498       1        2  0.8025373111
## 391       1        2  0.8022140868
## 407       1        2  0.8018007327
## 532       1        2  0.8018007327
## 464       1        2  0.8017561534
## 25        1        2  0.8012602883
## 91        1        2  0.8012602883
## 96        1        2  0.8012602883
## 172       1        2  0.8012602883
## 194       1        2  0.8012602883
## 203       1        2  0.8012602883
## 205       1        2  0.8012602883
## 213       1        2  0.8012602883
## 218       1        2  0.8012602883
## 245       1        2  0.8012602883
## 270       1        2  0.8012602883
## 279       1        2  0.8012602883
## 292       1        2  0.8012602883
## 302       1        2  0.8012602883
## 304       1        2  0.8012602883
## 307       1        2  0.8012602883
## 308       1        2  0.8012602883
## 325       1        2  0.8012602883
## 338       1        2  0.8012602883
## 342       1        2  0.8012602883
## 552       1        2  0.8012602883
## 563       1        2  0.8012602883
## 580       1        2  0.8012602883
## 695       1        2  0.8006947673
## 672       1        2  0.8006220476
## 46        1        2  0.8005757604
## 522       1        2  0.8002172325
## 103       1        2  0.7995998037
## 62        1        2  0.7993979217
## 120       1        2  0.7971523593
## 394       1        2  0.7966688410
## 449       1        2  0.7966688410
## 497       1        2  0.7966688410
## 517       1        2  0.7966688410
## 162       1        2  0.7965159875
## 565       1        2  0.7965159875
## 356       1        2  0.7958115096
## 665       1        2  0.7942891848
## 28        1        2  0.7925144388
## 193       1        2  0.7925144388
## 512       1        2  0.7925144388
## 546       1        2  0.7925144388
## 560       1        2  0.7925144388
## 577       1        2  0.7925144388
## 596       1        2  0.7925144388
## 620       1        2  0.7925144388
## 632       1        2  0.7925144388
## 657       1        2  0.7925144388
## 444       1        2  0.7924074874
## 11        1        2  0.7918241360
## 151       1        2  0.7918241360
## 181       1        2  0.7918241360
## 208       1        2  0.7918241360
## 209       1        2  0.7918241360
## 229       1        2  0.7918241360
## 533       1        2  0.7918241360
## 521       1        2  0.7915341710
## 157       1        2  0.7911682285
## 221       1        2  0.7895519142
## 688       1        2  0.7888511148
## 453       1        2  0.7879362316
## 170       1        2  0.7875074527
## 70        1        2  0.7873915660
## 614       1        2  0.7869217227
## 148       1        2  0.7857736636
## 440       1        2  0.7851289323
## 448       1        2  0.7851289323
## 452       1        2  0.7851289323
## 475       1        2  0.7851289323
## 509       1        2  0.7851289323
## 513       1        2  0.7851289323
## 590       1        2  0.7851289323
## 610       1        2  0.7851289323
## 678       1        2  0.7851289323
## 251       1        2  0.7847652452
## 647       1        2  0.7847652452
## 506       1        2  0.7835867687
## 122       1        2  0.7833660160
## 581       1        2  0.7830482226
## 166       1        2  0.7828997532
## 545       1        2  0.7828376538
## 616       1        2  0.7826406141
## 374       1        2  0.7823768565
## 121       1        2  0.7803012321
## 433       1        2  0.7797233575
## 588       1        2  0.7797233575
## 653       1        2  0.7797233575
## 383       1        2  0.7796288556
## 1         1        2  0.7790344139
## 98        1        2  0.7790344139
## 204       1        2  0.7790344139
## 272       1        2  0.7790344139
## 537       1        2  0.7790344139
## 561       1        2  0.7790344139
## 562       1        2  0.7790344139
## 541       1        2  0.7780535845
## 481       1        2  0.7765085201
## 459       1        2  0.7763799360
## 485       1        2  0.7763799360
## 594       1        2  0.7763799360
## 641       1        2  0.7760597639
## 242       1        2  0.7745577575
## 451       1        2  0.7745533514
## 79        1        2  0.7743175694
## 625       1        2  0.7741984806
## 663       1        2  0.7739261368
## 664       1        2  0.7739261368
## 568       1        2  0.7736998033
## 479       1        2  0.7735884123
## 210       1        2  0.7704750832
## 414       1        2  0.7699108502
## 538       1        2  0.7699108502
## 576       1        2  0.7699108502
## 326       1        2  0.7696471565
## 434       1        2  0.7694800715
## 455       1        2  0.7694264675
## 5         1        2  0.7691699155
## 49        1        2  0.7691699155
## 30        1        2  0.7686860780
## 369       1        2  0.7686860780
## 372       1        2  0.7686860780
## 109       1        2  0.7685202756
## 648       1        2  0.7680359259
## 691       1        2  0.7680359259
## 154       1        2  0.7679355301
## 332       1        2  0.7676196798
## 370       1        2  0.7674839654
## 83        1        2  0.7669857062
## 683       1        2  0.7666924047
## 395       1        2  0.7636108715
## 652       1        2  0.7628229470
## 400       1        2  0.7587684182
## 431       1        2  0.7587322264
## 348       1        2  0.7580490391
## 371       1        2  0.7566993282
## 246       1        2  0.7556003593
## 557       1        2  0.7553287460
## 14        1        2  0.7552176230
## 71        1        2  0.7550090852
## 131       1        2  0.7550090852
## 424       1        2  0.7550090852
## 536       1        2  0.7534069633
## 390       1        2  0.7531617435
## 190       1        2  0.7517601838
## 333       1        2  0.7505139978
## 351       1        2  0.7499628244
## 461       1        2  0.7499628244
## 558       1        2  0.7499628244
## 640       1        2  0.7499628244
## 460       1        2  0.7498495085
## 405       1        2  0.7480845163
## 553       1        2  0.7477065579
## 667       1        2  0.7472194854
## 540       1        2  0.7469751555
## 636       1        2  0.7461065101
## 543       1        2  0.7448085055
## 409       1        2  0.7445651252
## 439       1        2  0.7424203592
## 598       1        2  0.7423000292
## 78        1        2  0.7408485883
## 76        1        2  0.7368557424
## 473       1        2  0.7366477327
## 20        1        2  0.7351750538
## 183       1        2  0.7351750538
## 501       1        2  0.7351750538
## 404       1        2  0.7351651439
## 676       1        2  0.7348945654
## 77        1        2  0.7336494643
## 443       1        2  0.7302904634
## 379       1        2  0.7287042809
## 486       1        2  0.7261700122
## 674       1        2  0.7255749391
## 119       1        2  0.7255404813
## 363       1        2  0.7244947371
## 115       1        2  0.7244796509
## 136       1        2  0.7232359719
## 508       1        2  0.7210223294
## 421       1        2  0.7171831028
## 319       1        2  0.7163152958
## 403       1        2  0.7160726252
## 651       1        2  0.7157875924
## 419       1        2  0.7125825401
## 463       1        2  0.7114787845
## 472       1        2  0.7114787845
## 386       1        2  0.7110380623
## 347       1        2  0.7051602240
## 420       1        2  0.7045659600
## 529       1        2  0.7028462927
## 198       1        2  0.7022021781
## 220       1        2  0.7021271296
## 631       1        2  0.7007505904
## 9         1        2  0.6996110270
## 600       1        2  0.6970828208
## 432       1        2  0.6917445746
## 626       1        2  0.6915947007
## 266       1        2  0.6904042115
## 423       1        2  0.6903662693
## 628       1        2  0.6825087532
## 388       1        2  0.6821673387
## 462       1        2  0.6816306285
## 638       1        2  0.6807388546
## 116       1        2  0.6727188095
## 144       1        2  0.6727188095
## 482       1        2  0.6679658281
## 241       1        2  0.6666348748
## 623       1        2  0.6568482211
## 223       1        2  0.6373510759
## 111       1        2  0.6316945641
## 380       1        2  0.6288998893
## 84        1        2  0.6241824633
## 81        1        2  0.6192492761
## 249       1        2  0.6135338346
## 310       1        2  0.6122257742
## 244       1        2  0.6088993130
## 442       1        2  0.6088420068
## 364       1        2  0.6075058340
## 585       1        2  0.6069547155
## 445       1        2  0.6052732905
## 427       1        2  0.5789697884
## 554       1        2  0.5766496142
## 299       1        2  0.5743399582
## 357       1        2  0.5724670145
## 164       1        2  0.5699115088
## 235       1        2  0.5697604958
## 38        1        2  0.5676024711
## 690       1        2  0.5628224207
## 490       1        2  0.5612663453
## 73        1        2  0.5526639090
## 117       1        2  0.5340430997
## 13        1        2  0.5067838408
## 130       1        2  0.4976031877
## 622       1        2  0.4922247894
## 274       1        2  0.4630476891
## 52        1        2  0.4499555767
## 556       1        2  0.4483924203
## 416       1        2  0.4159922560
## 102       1        2  0.3919721453
## 7         1        2  0.3742831460
## 456       1        2  0.3546045557
## 58        1        2  0.3400371816
## 104       1        2  0.3211844361
## 149       1        2  0.3067586017
## 233       1        2  0.3003040397
## 106       1        2  0.2908166855
## 60        1        2  0.2900669210
## 495       1        2  0.2549651525
## 658       1        2  0.2516824905
## 16        1        2  0.2265575425
## 349       1        2  0.2176979031
## 64        1        2  0.2163982233
## 66        1        2  0.1896334874
## 51        1        2  0.1742587228
## 75        1        2  0.1732895190
## 44        1        2  0.1315833529
## 6         2        1  0.4960761035
## 247       2        1  0.4958519160
## 214       2        1  0.4871184751
## 286       2        1  0.4851962770
## 201       2        1  0.4844903702
## 191       2        1  0.4794606276
## 587       2        1  0.4773440370
## 211       2        1  0.4754998299
## 263       2        1  0.4744907891
## 184       2        1  0.4728016369
## 212       2        1  0.4717710413
## 206       2        1  0.4708111578
## 484       2        1  0.4706205926
## 318       2        1  0.4697905984
## 450       2        1  0.4680337564
## 368       2        1  0.4670068624
## 426       2        1  0.4636678153
## 547       2        1  0.4616694650
## 232       2        1  0.4594325679
## 367       2        1  0.4589742642
## 681       2        1  0.4585138828
## 174       2        1  0.4549243491
## 160       2        1  0.4508051863
## 215       2        1  0.4504349240
## 494       2        1  0.4488277682
## 566       2        1  0.4464735260
## 161       2        1  0.4436891955
## 262       2        1  0.4422296346
## 230       2        1  0.4411727500
## 100       2        1  0.4407048896
## 392       2        1  0.4401748296
## 457       2        1  0.4390277126
## 572       2        1  0.4385866627
## 682       2        1  0.4378322053
## 582       2        1  0.4327401252
## 515       2        1  0.4325899231
## 150       2        1  0.4310016295
## 303       2        1  0.4294055955
## 613       2        1  0.4272292149
## 176       2        1  0.4268738930
## 227       2        1  0.4222060708
## 422       2        1  0.4208325245
## 192       2        1  0.4201021501
## 133       2        1  0.4195561402
## 466       2        1  0.4171467904
## 361       2        1  0.4155515529
## 153       2        1  0.4153600799
## 15        2        1  0.4149794259
## 583       2        1  0.4145942615
## 358       2        1  0.4103599290
## 488       2        1  0.4100411963
## 468       2        1  0.4096791116
## 290       2        1  0.4094617364
## 225       2        1  0.4081392720
## 606       2        1  0.4080698552
## 287       2        1  0.4055278571
## 167       2        1  0.4044009339
## 670       2        1  0.4043012062
## 480       2        1  0.4032472843
## 345       2        1  0.3968773016
## 216       2        1  0.3964967624
## 507       2        1  0.3947557653
## 239       2        1  0.3942934915
## 382       2        1  0.3930134002
## 189       2        1  0.3917837591
## 659       2        1  0.3914682243
## 219       2        1  0.3914516037
## 413       2        1  0.3878176325
## 354       2        1  0.3873380195
## 296       2        1  0.3865788274
## 483       2        1  0.3839103413
## 417       2        1  0.3837896076
## 43        2        1  0.3818288312
## 254       2        1  0.3818288312
## 74        2        1  0.3797396095
## 467       2        1  0.3794464252
## 516       2        1  0.3793906332
## 454       2        1  0.3777801683
## 123       2        1  0.3777629924
## 228       2        1  0.3775055560
## 188       2        1  0.3772836058
## 222       2        1  0.3771826149
## 88        2        1  0.3758750634
## 609       2        1  0.3746904970
## 570       2        1  0.3738336542
## 458       2        1  0.3716934430
## 85        2        1  0.3715200275
## 202       2        1  0.3655947737
## 306       2        1  0.3614087383
## 671       2        1  0.3580627537
## 125       2        1  0.3537949980
## 237       2        1  0.3526395552
## 264       2        1  0.3487057651
## 401       2        1  0.3484729450
## 114       2        1  0.3480297122
## 571       2        1  0.3440854257
## 37        2        1  0.3426006764
## 107       2        1  0.3421786259
## 22        2        1  0.3420583297
## 108       2        1  0.3406937852
## 637       2        1  0.3390415474
## 524       2        1  0.3359352440
## 105       2        1  0.3306408672
## 127       2        1  0.3282526836
## 697       2        1  0.3251777881
## 69        2        1  0.3249541032
## 309       2        1  0.3246506916
## 19        2        1  0.3241334721
## 261       2        1  0.3225371319
## 267       2        1  0.3223442294
## 45        2        1  0.3194861481
## 50        2        1  0.3173393269
## 649       2        1  0.3158703281
## 627       2        1  0.3120531692
## 207       2        1  0.3096030163
## 33        2        1  0.3083532169
## 118       2        1  0.3070423386
## 110       2        1  0.3067425617
## 415       2        1  0.3064437961
## 592       2        1  0.3047770748
## 285       2        1  0.3042060148
## 301       2        1  0.3010835141
## 47        2        1  0.3001594879
## 63        2        1  0.2997635006
## 255       2        1  0.2997635006
## 187       2        1  0.2985333101
## 550       2        1  0.2979963631
## 531       2        1  0.2950003497
## 359       2        1  0.2942822990
## 612       2        1  0.2933310500
## 238       2        1  0.2924991060
## 492       2        1  0.2922749378
## 197       2        1  0.2914233556
## 57        2        1  0.2891969522
## 435       2        1  0.2879614698
## 54        2        1  0.2849553401
## 335       2        1  0.2794149533
## 55        2        1  0.2750758164
## 337       2        1  0.2744386384
## 185       2        1  0.2723100234
## 252       2        1  0.2715845796
## 269       2        1  0.2698819951
## 231       2        1  0.2672291110
## 436       2        1  0.2664600791
## 692       2        1  0.2654748873
## 699       2        1  0.2641902996
## 330       2        1  0.2625774494
## 441       2        1  0.2604459474
## 99        2        1  0.2600101335
## 520       2        1  0.2540254246
## 39        2        1  0.2535881552
## 271       2        1  0.2521274013
## 113       2        1  0.2481645580
## 362       2        1  0.2460810054
## 178       2        1  0.2456281664
## 575       2        1  0.2382929076
## 611       2        1  0.2378529187
## 321       2        1  0.2378296289
## 21        2        1  0.2318342039
## 224       2        1  0.2306488709
## 256       2        1  0.2247062299
## 175       2        1  0.2241981279
## 340       2        1  0.2194158266
## 265       2        1  0.2187400022
## 300       2        1  0.2185593231
## 595       2        1  0.2160482242
## 334       2        1  0.2149052387
## 72        2        1  0.2117769835
## 86        2        1  0.2108784254
## 234       2        1  0.2088469958
## 387       2        1  0.2001733981
## 589       2        1  0.1966227119
## 634       2        1  0.1913866937
## 698       2        1  0.1910561256
## 305       2        1  0.1868249986
## 129       2        1  0.1860810986
## 428       2        1  0.1851823984
## 329       2        1  0.1787405996
## 68        2        1  0.1771361253
## 437       2        1  0.1725212278
## 324       2        1  0.1665398577
## 283       2        1  0.1661609528
## 56        2        1  0.1660328368
## 168       2        1  0.1643868700
## 284       2        1  0.1587159591
## 341       2        1  0.1525545585
## 294       2        1  0.1523949001
## 101       2        1  0.1518242244
## 280       2        1  0.1508421486
## 53        2        1  0.1408862250
## 260       2        1  0.1404192016
## 604       2        1  0.1342964098
## 605       2        1  0.1301816839
## 240       2        1  0.1275360990
## 569       2        1  0.1254744904
## 317       2        1  0.1231040495
## 593       2        1  0.1117087938
## 2         2        1  0.1111314047
## 360       2        1  0.1082820016
## 331       2        1  0.1024349876
## 313       2        1  0.1023000709
## 253       2        1  0.0993707001
## 4         2        1  0.0948486101
## 156       2        1  0.0946531873
## 124       2        1  0.0920288429
## 112       2        1  0.0893270551
## 152       2        1  0.0881158561
## 591       2        1  0.0764686428
## 523       2        1  0.0710359246
## 669       2        1  0.0362279926
## 143       2        1  0.0356336431
## 350       2        1  0.0260346776
## 87        2        1  0.0052381442
## 40        2        1 -0.0008144982
## 61        2        1 -0.0323805276
## 268       2        1 -0.0547775967
## 273       2        1 -0.0547775967
## 248       2        1 -0.0617937638
## 327       2        1 -0.0707318488
## 180       2        1 -0.0874725524
## 489       2        1 -0.0921716413
## 320       2        1 -0.1062849245
## 289       2        1 -0.1171822935
## 59        2        1 -0.1227622604
## 297       2        1 -0.1514838984
## 147       2        1 -0.1520148546
## 42        2        1 -0.1619599324
## 353       2        1 -0.1889455512
## 26        2        1 -0.2035022665
## Average silhouette width per cluster:
## [1] 0.7550140 0.2828213
## Average silhouette width of total data set:
## [1] 0.5966946
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"
bcw.cluster.labels <- bcw.pam2$clustering

bcw.sil.pam2 <- silhouette(bcw.cluster.labels, dist(bcw.features))
fviz_silhouette(bcw.sil.pam2, xlab="PAM") #silhouette information
##   cluster size ave.sil.width
## 1       1  454          0.76
## 2       2  229          0.28

# Visualization of cluster analysis results (3D scatterplot)
plot3d(bcw.features$uniformity_cell_size, bcw.features$uniformity_cell_shape, bcw$clump_thickness, col=bcw.cluster.labels, 
       pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="uniformity_cell_size", 
       ylab="uniformity_cell_shape", zlab="clump thickness")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels), 
         col = bcw.cluster.labels, cex=1, inset=c(0.02))
#snapshot3d(filename = '3dplot.png', fmt = 'png')


### Application of PAM algorithm FOR PCA FEATURES
pca.pam2 <- pam(x=pca.features, k=2)
#X11()
#plot(pca.pam2) # default visualization (note: plot() works differently for quantitative and mixed data types)
(summary(pca.pam2)) 
## Medoids:
##      ID bcw.after.pca.x...1. bcw.after.pca.x...2. bcw.after.pca.x...3.
## 134 132             1.714471           0.09986549         -0.006010226
## 659 643            -3.115058          -0.50006703          0.011453841
## Clustering vector:
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
##   1   2   1   2   1   2   1   1   1   1   1   1   1   1   2   2   1   1   2   1 
##  21  22  23  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  42 
##   2   2   1   1   1   1   1   1   1   1   1   2   1   1   1   2   1   2   2   2 
##  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62 
##   2   2   2   1   2   1   1   2   2   1   2   2   2   2   2   1   1   1   2   1 
##  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82 
##   2   1   1   2   1   2   2   1   1   2   1   2   2   1   1   1   1   1   1   1 
##  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102 
##   1   1   2   2   2   2   1   1   1   1   1   1   1   1   1   1   2   2   2   1 
## 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 
##   1   1   2   1   2   2   1   2   1   2   2   2   1   1   1   2   1   1   1   1 
## 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 141 142 143 
##   2   2   2   1   2   1   2   1   1   1   2   1   1   1   1   1   1   1   1   2 
## 144 145 147 148 149 150 151 152 153 154 155 156 157 158 160 161 162 163 164 166 
##   1   1   2   1   1   2   1   2   2   1   1   2   1   1   2   2   1   1   1   1 
## 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 
##   2   2   1   1   1   1   1   2   2   2   1   2   1   2   1   1   1   2   2   1 
## 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 
##   2   2   2   1   2   2   1   1   1   1   2   1   1   1   2   2   1   1   1   2 
## 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 
##   2   1   1   1   2   2   1   2   2   2   1   1   2   1   1   2   1   2   2   1 
## 227 228 229 230 231 232 233 234 235 237 238 239 240 241 242 243 244 245 246 247 
##   2   2   1   2   2   2   1   2   1   2   2   2   2   1   1   1   1   1   1   2 
## 248 249 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 
##   1   1   1   2   2   2   2   2   1   1   1   2   2   2   2   2   2   1   2   2 
## 269 270 271 272 273 274 275 277 278 279 280 281 282 283 284 285 286 287 288 289 
##   2   1   2   1   2   1   1   1   1   1   2   1   1   2   2   2   2   2   1   2 
## 290 291 292 294 296 297 299 300 301 302 303 304 305 306 307 308 309 310 311 312 
##   2   1   1   2   2   2   1   2   2   1   2   1   2   2   1   1   2   1   1   1 
## 313 314 315 317 318 319 320 321 323 324 325 326 327 328 329 330 331 332 333 334 
##   2   1   1   2   2   1   2   2   1   2   1   1   1   1   2   2   2   1   1   2 
## 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 
##   2   1   2   1   1   2   2   1   1   1   2   1   1   1   2   2   1   1   2   2 
## 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 
##   1   1   1   2   2   2   2   2   1   1   1   1   2   2   1   1   1   1   1   1 
## 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 
##   1   1   1   1   1   1   1   2   1   1   1   1   2   1   1   1   1   2   1   1 
## 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 413 414 415 
##   1   1   1   1   1   1   2   1   1   1   1   1   1   1   1   1   1   2   1   2 
## 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 
##   1   2   1   1   1   1   2   1   1   1   2   1   2   1   1   1   1   1   1   2 
## 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 
##   2   2   1   1   1   2   1   1   1   1   1   1   1   1   2   1   1   1   2   1 
## 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 
##   1   2   2   1   1   1   1   1   1   1   2   2   2   1   1   1   1   1   1   1 
## 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 
##   1   1   1   1   2   1   1   2   2   1   1   1   2   2   1   1   2   1   2   1 
## 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 
##   1   1   1   1   1   1   1   1   1   1   1   2   1   1   1   1   1   1   1   2 
## 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 
##   2   1   1   1   2   1   1   2   2   1   1   1   1   1   1   2   1   1   1   1 
## 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 
##   1   1   1   1   1   1   1   1   1   1   1   2   1   1   2   1   1   1   1   1 
## 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 
##   1   1   1   1   1   1   1   1   1   1   2   1   1   2   2   2   2   1   1   2 
## 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 
##   1   1   1   1   1   1   2   2   1   1   1   2   1   2   1   2   2   2   1   2 
## 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 
##   1   1   1   1   1   1   1   1   2   2   2   1   1   2   1   2   2   2   1   1 
## 616 617 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 
##   1   1   1   1   1   1   1   1   1   1   2   1   1   1   1   1   1   2   1   1 
## 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 
##   2   1   1   1   1   1   1   1   1   1   1   1   2   1   1   1   1   1   1   1 
## 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 
##   1   2   2   1   1   1   1   1   1   1   1   1   2   2   2   1   1   1   1   1 
## 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 
##   1   1   1   1   2   2   1   1   1   1   1   1   1   1   1   2   1   1   1   1 
## 697 698 699 
##   2   2   2 
## Objective function:
##    build     swap 
## 1.199631 1.118681 
## 
## Numerical information per cluster:
##      size max_diss   av_diss diameter separation
## [1,]  451 3.871514 0.6667585 5.732047  0.1954325
## [2,]  232 5.759795 1.9972022 8.103404  0.1954325
## 
## Isolated clusters:
##  L-clusters: character(0)
##  L*-clusters: character(0)
## 
## Silhouette plot information:
##     cluster neighbor     sil_width
## 23        1        2  0.8700179613
## 200       1        2  0.8700179613
## 258       1        2  0.8700179613
## 277       1        2  0.8700179613
## 393       1        2  0.8700179613
## 396       1        2  0.8700179613
## 525       1        2  0.8700179613
## 534       1        2  0.8700179613
## 551       1        2  0.8700179613
## 564       1        2  0.8700179613
## 573       1        2  0.8700179613
## 599       1        2  0.8700179613
## 601       1        2  0.8700179613
## 617       1        2  0.8700179613
## 621       1        2  0.8700179613
## 642       1        2  0.8700179613
## 643       1        2  0.8700179613
## 646       1        2  0.8700179613
## 650       1        2  0.8700179613
## 656       1        2  0.8700179613
## 134       1        2  0.8695091890
## 519       1        2  0.8691071880
## 92        1        2  0.8687821986
## 526       1        2  0.8687821986
## 35        1        2  0.8685340229
## 375       1        2  0.8685340229
## 410       1        2  0.8685340229
## 128       1        2  0.8674225071
## 163       1        2  0.8674225071
## 169       1        2  0.8674225071
## 195       1        2  0.8674225071
## 259       1        2  0.8674225071
## 281       1        2  0.8674225071
## 323       1        2  0.8674225071
## 397       1        2  0.8674225071
## 655       1        2  0.8674225071
## 668       1        2  0.8674225071
## 453       1        2  0.8673245999
## 27        1        2  0.8666938637
## 135       1        2  0.8666882536
## 288       1        2  0.8666882536
## 506       1        2  0.8660268999
## 695       1        2  0.8657641411
## 688       1        2  0.8643841136
## 12        1        2  0.8643156960
## 29        1        2  0.8643156960
## 36        1        2  0.8643156960
## 145       1        2  0.8643156960
## 366       1        2  0.8643156960
## 430       1        2  0.8643156960
## 535       1        2  0.8643156960
## 559       1        2  0.8643156960
## 404       1        2  0.8640405720
## 399       1        2  0.8639400482
## 389       1        2  0.8633921237
## 641       1        2  0.8630522732
## 628       1        2  0.8627805615
## 31        1        2  0.8616963438
## 471       1        2  0.8616963438
## 487       1        2  0.8616963438
## 496       1        2  0.8616963438
## 514       1        2  0.8616963438
## 614       1        2  0.8614914652
## 451       1        2  0.8613736916
## 275       1        2  0.8611665683
## 80        1        2  0.8611259050
## 311       1        2  0.8611259050
## 32        1        2  0.8606262688
## 95        1        2  0.8606262688
## 132       1        2  0.8606262688
## 158       1        2  0.8606262688
## 177       1        2  0.8606262688
## 243       1        2  0.8606262688
## 352       1        2  0.8606262688
## 365       1        2  0.8606262688
## 673       1        2  0.8606262688
## 636       1        2  0.8603854181
## 138       1        2  0.8603472343
## 141       1        2  0.8603472343
## 171       1        2  0.8603472343
## 257       1        2  0.8603472343
## 402       1        2  0.8603472343
## 425       1        2  0.8603472343
## 476       1        2  0.8603472343
## 542       1        2  0.8603472343
## 555       1        2  0.8603472343
## 584       1        2  0.8603472343
## 635       1        2  0.8603472343
## 693       1        2  0.8603472343
## 3         1        2  0.8597295721
## 97        1        2  0.8596549595
## 567       1        2  0.8596435545
## 82        1        2  0.8595483469
## 503       1        2  0.8595483469
## 186       1        2  0.8584724310
## 607       1        2  0.8579559096
## 665       1        2  0.8575903159
## 455       1        2  0.8571805774
## 142       1        2  0.8571359501
## 343       1        2  0.8571359501
## 384       1        2  0.8571359501
## 385       1        2  0.8571359501
## 446       1        2  0.8571359501
## 510       1        2  0.8571359501
## 629       1        2  0.8571359501
## 645       1        2  0.8571359501
## 680       1        2  0.8571359501
## 696       1        2  0.8571359501
## 615       1        2  0.8571072345
## 462       1        2  0.8569943925
## 8         1        2  0.8559537371
## 17        1        2  0.8541967870
## 137       1        2  0.8541967870
## 499       1        2  0.8541967870
## 500       1        2  0.8541967870
## 502       1        2  0.8541967870
## 539       1        2  0.8541967870
## 544       1        2  0.8541967870
## 603       1        2  0.8541967870
## 619       1        2  0.8541967870
## 654       1        2  0.8541967870
## 18        1        2  0.8535261110
## 67        1        2  0.8535261110
## 89        1        2  0.8535261110
## 93        1        2  0.8535261110
## 179       1        2  0.8535261110
## 196       1        2  0.8535261110
## 504       1        2  0.8535261110
## 528       1        2  0.8535261110
## 662       1        2  0.8535261110
## 434       1        2  0.8523382499
## 508       1        2  0.8521699749
## 30        1        2  0.8521018546
## 369       1        2  0.8521018546
## 372       1        2  0.8521018546
## 400       1        2  0.8512756965
## 10        1        2  0.8510973572
## 251       1        2  0.8502669368
## 647       1        2  0.8502669368
## 116       1        2  0.8495744988
## 144       1        2  0.8495744988
## 139       1        2  0.8495140200
## 373       1        2  0.8495140200
## 597       1        2  0.8495140200
## 370       1        2  0.8494984586
## 464       1        2  0.8494303338
## 120       1        2  0.8492341859
## 242       1        2  0.8490284868
## 549       1        2  0.8489023528
## 79        1        2  0.8485840499
## 651       1        2  0.8485104505
## 162       1        2  0.8479871364
## 565       1        2  0.8479871364
## 90        1        2  0.8475690136
## 545       1        2  0.8466395445
## 493       1        2  0.8466173631
## 530       1        2  0.8466173631
## 498       1        2  0.8465852707
## 548       1        2  0.8464081015
## 522       1        2  0.8461948283
## 470       1        2  0.8458939651
## 677       1        2  0.8458939651
## 444       1        2  0.8456615040
## 663       1        2  0.8454528677
## 664       1        2  0.8454528677
## 356       1        2  0.8452935109
## 477       1        2  0.8450584370
## 154       1        2  0.8449903837
## 672       1        2  0.8448069175
## 62        1        2  0.8447467683
## 34        1        2  0.8445621986
## 282       1        2  0.8445621986
## 398       1        2  0.8441986945
## 438       1        2  0.8441986945
## 465       1        2  0.8441986945
## 469       1        2  0.8441986945
## 474       1        2  0.8441986945
## 478       1        2  0.8441986945
## 527       1        2  0.8441986945
## 630       1        2  0.8441986945
## 639       1        2  0.8441986945
## 689       1        2  0.8441986945
## 157       1        2  0.8436125027
## 568       1        2  0.8428824291
## 103       1        2  0.8423347404
## 48        1        2  0.8418108292
## 65        1        2  0.8418108292
## 94        1        2  0.8418108292
## 126       1        2  0.8418108292
## 173       1        2  0.8418108292
## 217       1        2  0.8418108292
## 226       1        2  0.8418108292
## 278       1        2  0.8418108292
## 328       1        2  0.8418108292
## 355       1        2  0.8418108292
## 377       1        2  0.8418108292
## 406       1        2  0.8418108292
## 408       1        2  0.8418108292
## 411       1        2  0.8418108292
## 418       1        2  0.8418108292
## 429       1        2  0.8418108292
## 574       1        2  0.8418108292
## 578       1        2  0.8418108292
## 579       1        2  0.8418108292
## 661       1        2  0.8418108292
## 675       1        2  0.8418108292
## 439       1        2  0.8415360355
## 407       1        2  0.8414583981
## 532       1        2  0.8414583981
## 326       1        2  0.8409555861
## 77        1        2  0.8405431593
## 348       1        2  0.8401012686
## 616       1        2  0.8397339851
## 395       1        2  0.8388279601
## 148       1        2  0.8381962962
## 351       1        2  0.8375160238
## 461       1        2  0.8375160238
## 558       1        2  0.8375160238
## 640       1        2  0.8375160238
## 155       1        2  0.8375061091
## 182       1        2  0.8375061091
## 199       1        2  0.8375061091
## 291       1        2  0.8375061091
## 312       1        2  0.8375061091
## 314       1        2  0.8375061091
## 336       1        2  0.8375061091
## 344       1        2  0.8375061091
## 346       1        2  0.8375061091
## 376       1        2  0.8375061091
## 381       1        2  0.8375061091
## 447       1        2  0.8375061091
## 491       1        2  0.8375061091
## 505       1        2  0.8375061091
## 511       1        2  0.8375061091
## 586       1        2  0.8375061091
## 608       1        2  0.8375061091
## 624       1        2  0.8375061091
## 633       1        2  0.8375061091
## 644       1        2  0.8375061091
## 660       1        2  0.8375061091
## 666       1        2  0.8375061091
## 679       1        2  0.8375061091
## 684       1        2  0.8375061091
## 685       1        2  0.8375061091
## 686       1        2  0.8375061091
## 687       1        2  0.8375061091
## 521       1        2  0.8374899427
## 5         1        2  0.8362348944
## 49        1        2  0.8362348944
## 25        1        2  0.8358485413
## 91        1        2  0.8358485413
## 96        1        2  0.8358485413
## 172       1        2  0.8358485413
## 194       1        2  0.8358485413
## 203       1        2  0.8358485413
## 205       1        2  0.8358485413
## 213       1        2  0.8358485413
## 218       1        2  0.8358485413
## 245       1        2  0.8358485413
## 270       1        2  0.8358485413
## 279       1        2  0.8358485413
## 292       1        2  0.8358485413
## 302       1        2  0.8358485413
## 304       1        2  0.8358485413
## 307       1        2  0.8358485413
## 308       1        2  0.8358485413
## 325       1        2  0.8358485413
## 338       1        2  0.8358485413
## 342       1        2  0.8358485413
## 552       1        2  0.8358485413
## 563       1        2  0.8358485413
## 580       1        2  0.8358485413
## 315       1        2  0.8355684892
## 339       1        2  0.8355684892
## 378       1        2  0.8355684892
## 518       1        2  0.8355684892
## 602       1        2  0.8355684892
## 166       1        2  0.8353921766
## 119       1        2  0.8346706316
## 11        1        2  0.8345351367
## 151       1        2  0.8345351367
## 181       1        2  0.8345351367
## 208       1        2  0.8345351367
## 209       1        2  0.8345351367
## 229       1        2  0.8345351367
## 533       1        2  0.8345351367
## 676       1        2  0.8332002280
## 109       1        2  0.8325462988
## 431       1        2  0.8312627153
## 626       1        2  0.8309867170
## 122       1        2  0.8306552048
## 374       1        2  0.8292699725
## 625       1        2  0.8290325748
## 170       1        2  0.8283847036
## 70        1        2  0.8277462109
## 391       1        2  0.8275951925
## 394       1        2  0.8275559259
## 449       1        2  0.8275559259
## 497       1        2  0.8275559259
## 517       1        2  0.8275559259
## 14        1        2  0.8261979172
## 694       1        2  0.8260490036
## 486       1        2  0.8242403048
## 433       1        2  0.8190504846
## 588       1        2  0.8190504846
## 653       1        2  0.8190504846
## 28        1        2  0.8189866610
## 193       1        2  0.8189866610
## 512       1        2  0.8189866610
## 546       1        2  0.8189866610
## 560       1        2  0.8189866610
## 577       1        2  0.8189866610
## 596       1        2  0.8189866610
## 620       1        2  0.8189866610
## 632       1        2  0.8189866610
## 657       1        2  0.8189866610
## 1         1        2  0.8187639671
## 98        1        2  0.8187639671
## 204       1        2  0.8187639671
## 272       1        2  0.8187639671
## 537       1        2  0.8187639671
## 561       1        2  0.8187639671
## 562       1        2  0.8187639671
## 648       1        2  0.8174654413
## 691       1        2  0.8174654413
## 210       1        2  0.8173075575
## 443       1        2  0.8171674135
## 190       1        2  0.8171247159
## 420       1        2  0.8168898477
## 683       1        2  0.8136517335
## 221       1        2  0.8132143718
## 541       1        2  0.8123728198
## 383       1        2  0.8120838008
## 557       1        2  0.8114160193
## 371       1        2  0.8107828321
## 581       1        2  0.8101576760
## 440       1        2  0.8099295643
## 448       1        2  0.8099295643
## 452       1        2  0.8099295643
## 475       1        2  0.8099295643
## 509       1        2  0.8099295643
## 513       1        2  0.8099295643
## 590       1        2  0.8099295643
## 610       1        2  0.8099295643
## 678       1        2  0.8099295643
## 479       1        2  0.8088207917
## 121       1        2  0.8084118960
## 83        1        2  0.8070658721
## 459       1        2  0.8062612101
## 485       1        2  0.8062612101
## 594       1        2  0.8062612101
## 405       1        2  0.8053445886
## 223       1        2  0.8048752066
## 246       1        2  0.8046738808
## 414       1        2  0.8043028508
## 538       1        2  0.8043028508
## 576       1        2  0.8043028508
## 543       1        2  0.8037193155
## 78        1        2  0.8032188775
## 46        1        2  0.8027219940
## 481       1        2  0.8018234326
## 363       1        2  0.8004427103
## 536       1        2  0.8000897917
## 652       1        2  0.7994131055
## 198       1        2  0.7993970376
## 390       1        2  0.7986300935
## 460       1        2  0.7983093294
## 379       1        2  0.7981151112
## 115       1        2  0.7972213475
## 71        1        2  0.7968320128
## 131       1        2  0.7968320128
## 424       1        2  0.7968320128
## 463       1        2  0.7918861238
## 472       1        2  0.7918861238
## 409       1        2  0.7908559266
## 249       1        2  0.7892470907
## 333       1        2  0.7885800561
## 319       1        2  0.7880646813
## 585       1        2  0.7880526794
## 136       1        2  0.7877721322
## 76        1        2  0.7874628385
## 332       1        2  0.7864117581
## 598       1        2  0.7856328637
## 482       1        2  0.7819411473
## 600       1        2  0.7817107492
## 553       1        2  0.7798631857
## 674       1        2  0.7788604263
## 347       1        2  0.7682689411
## 540       1        2  0.7663419252
## 20        1        2  0.7656629767
## 183       1        2  0.7656629767
## 501       1        2  0.7656629767
## 421       1        2  0.7655400206
## 403       1        2  0.7640010041
## 667       1        2  0.7625303838
## 445       1        2  0.7566601281
## 473       1        2  0.7564568767
## 638       1        2  0.7556068736
## 529       1        2  0.7522961123
## 310       1        2  0.7513819023
## 266       1        2  0.7491496351
## 423       1        2  0.7488702007
## 432       1        2  0.7460086541
## 84        1        2  0.7440794294
## 442       1        2  0.7424647956
## 244       1        2  0.7411638827
## 81        1        2  0.7384361978
## 241       1        2  0.7337808782
## 631       1        2  0.7296178630
## 220       1        2  0.7286078712
## 7         1        2  0.7186900700
## 419       1        2  0.7156621123
## 554       1        2  0.7101842819
## 427       1        2  0.7070416903
## 623       1        2  0.7056674138
## 388       1        2  0.7031681798
## 364       1        2  0.6960803376
## 235       1        2  0.6856260871
## 38        1        2  0.6792289177
## 130       1        2  0.6790874080
## 386       1        2  0.6675903962
## 111       1        2  0.6595356819
## 380       1        2  0.6562492450
## 556       1        2  0.6355712201
## 299       1        2  0.5970975794
## 73        1        2  0.5849775996
## 490       1        2  0.5832164256
## 9         1        2  0.5764595550
## 357       1        2  0.5153747247
## 117       1        2  0.5124054852
## 13        1        2  0.5089601493
## 102       1        2  0.5029617385
## 52        1        2  0.4849722813
## 274       1        2  0.4601146000
## 622       1        2  0.4512211463
## 164       1        2  0.4464002224
## 416       1        2  0.4319967536
## 456       1        2  0.3957842657
## 690       1        2  0.3738142671
## 495       1        2  0.3731851735
## 26        1        2  0.3180243896
## 149       1        2  0.2659170141
## 60        1        2  0.2097421035
## 104       1        2  0.1916239230
## 233       1        2  0.1865550700
## 64        1        2  0.1856150300
## 59        1        2  0.1795207504
## 248       1        2  0.1599340015
## 327       1        2  0.1516320647
## 58        1        2  0.1336629588
## 106       1        2  0.0845487305
## 43        2        1  0.5987976297
## 254       2        1  0.5987976297
## 37        2        1  0.5962998735
## 201       2        1  0.5949290659
## 417       2        1  0.5946926844
## 216       2        1  0.5946001883
## 232       2        1  0.5944551344
## 572       2        1  0.5943042542
## 458       2        1  0.5936584445
## 45        2        1  0.5904133905
## 697       2        1  0.5887598620
## 466       2        1  0.5884029821
## 413       2        1  0.5871720643
## 222       2        1  0.5864513724
## 637       2        1  0.5858514014
## 345       2        1  0.5856425032
## 219       2        1  0.5819725251
## 659       2        1  0.5819212411
## 74        2        1  0.5816219278
## 212       2        1  0.5816124846
## 153       2        1  0.5813879940
## 263       2        1  0.5787679799
## 570       2        1  0.5768854318
## 484       2        1  0.5766975713
## 176       2        1  0.5751354174
## 682       2        1  0.5743806419
## 100       2        1  0.5740754248
## 571       2        1  0.5724473926
## 247       2        1  0.5708742633
## 583       2        1  0.5684196967
## 15        2        1  0.5665718247
## 192       2        1  0.5664463508
## 225       2        1  0.5664409029
## 191       2        1  0.5656350595
## 457       2        1  0.5653768559
## 494       2        1  0.5651090760
## 488       2        1  0.5646286480
## 301       2        1  0.5642068742
## 202       2        1  0.5638656777
## 318       2        1  0.5635939657
## 422       2        1  0.5627643456
## 228       2        1  0.5611486500
## 133       2        1  0.5600782630
## 264       2        1  0.5599759635
## 515       2        1  0.5589784190
## 114       2        1  0.5587007822
## 670       2        1  0.5578858032
## 261       2        1  0.5575466818
## 382       2        1  0.5570942793
## 606       2        1  0.5553121617
## 6         2        1  0.5536401369
## 582       2        1  0.5505628979
## 167       2        1  0.5492655411
## 184       2        1  0.5468388191
## 161       2        1  0.5450981049
## 309       2        1  0.5442388166
## 368       2        1  0.5439628322
## 230       2        1  0.5426960260
## 450       2        1  0.5423952220
## 125       2        1  0.5417546434
## 468       2        1  0.5417483614
## 214       2        1  0.5416865177
## 547       2        1  0.5412183582
## 227       2        1  0.5409324350
## 22        2        1  0.5406775105
## 286       2        1  0.5404851872
## 392       2        1  0.5370104102
## 123       2        1  0.5363458076
## 612       2        1  0.5334757193
## 550       2        1  0.5322729890
## 178       2        1  0.5309842580
## 215       2        1  0.5309591321
## 63        2        1  0.5265893371
## 255       2        1  0.5265893371
## 359       2        1  0.5254209405
## 290       2        1  0.5245915385
## 262       2        1  0.5242723141
## 265       2        1  0.5242607601
## 589       2        1  0.5238117547
## 206       2        1  0.5234910463
## 211       2        1  0.5224602493
## 55        2        1  0.5222160866
## 609       2        1  0.5211363937
## 33        2        1  0.5210679463
## 587       2        1  0.5191617701
## 160       2        1  0.5168889764
## 296       2        1  0.5114483845
## 113       2        1  0.5107805876
## 107       2        1  0.5065360084
## 189       2        1  0.5062290974
## 426       2        1  0.5048903328
## 566       2        1  0.5035068954
## 441       2        1  0.5023578899
## 207       2        1  0.5023141087
## 467       2        1  0.5019828683
## 197       2        1  0.4986848475
## 516       2        1  0.4976340568
## 238       2        1  0.4970682807
## 524       2        1  0.4969274775
## 252       2        1  0.4967983943
## 313       2        1  0.4961071646
## 681       2        1  0.4960008821
## 575       2        1  0.4946486100
## 692       2        1  0.4937875818
## 285       2        1  0.4920981397
## 611       2        1  0.4911861355
## 306       2        1  0.4897163496
## 57        2        1  0.4872092768
## 480       2        1  0.4861595251
## 127       2        1  0.4842157582
## 627       2        1  0.4818009239
## 454       2        1  0.4815024344
## 88        2        1  0.4786305452
## 110       2        1  0.4746721681
## 150       2        1  0.4744501019
## 671       2        1  0.4726793918
## 699       2        1  0.4709653400
## 354       2        1  0.4706248162
## 19        2        1  0.4699301791
## 367       2        1  0.4684535714
## 50        2        1  0.4680005863
## 634       2        1  0.4679249077
## 507       2        1  0.4642472704
## 174       2        1  0.4638791419
## 435       2        1  0.4633501933
## 428       2        1  0.4618898643
## 358       2        1  0.4579246459
## 492       2        1  0.4521172430
## 300       2        1  0.4513353322
## 531       2        1  0.4496760533
## 329       2        1  0.4429948331
## 21        2        1  0.4415310741
## 267       2        1  0.4326393467
## 520       2        1  0.4316598543
## 105       2        1  0.4311583089
## 401       2        1  0.4264591489
## 362       2        1  0.4245070828
## 437       2        1  0.4216987155
## 69        2        1  0.4210891665
## 231       2        1  0.4201672628
## 604       2        1  0.4193797854
## 613       2        1  0.4185451409
## 698       2        1  0.4168146044
## 108       2        1  0.4167310618
## 85        2        1  0.4137006604
## 591       2        1  0.4123113938
## 101       2        1  0.4113971078
## 330       2        1  0.4084910536
## 54        2        1  0.4066440734
## 47        2        1  0.4054600572
## 188       2        1  0.4019267354
## 415       2        1  0.3972628708
## 303       2        1  0.3968049173
## 187       2        1  0.3928384051
## 68        2        1  0.3898083353
## 483       2        1  0.3892891893
## 185       2        1  0.3879610166
## 337       2        1  0.3872636623
## 361       2        1  0.3846439706
## 287       2        1  0.3840907246
## 239       2        1  0.3817356058
## 118       2        1  0.3784418282
## 269       2        1  0.3781720567
## 321       2        1  0.3732122804
## 592       2        1  0.3723667452
## 271       2        1  0.3676500518
## 335       2        1  0.3587238302
## 595       2        1  0.3585537492
## 436       2        1  0.3585359911
## 649       2        1  0.3572560635
## 224       2        1  0.3529320630
## 669       2        1  0.3447604805
## 237       2        1  0.3382847942
## 39        2        1  0.3376495914
## 605       2        1  0.3339627652
## 234       2        1  0.3216442924
## 175       2        1  0.3185091326
## 72        2        1  0.3070781630
## 53        2        1  0.3032924942
## 305       2        1  0.3016720793
## 168       2        1  0.2943736047
## 256       2        1  0.2939902550
## 56        2        1  0.2939698287
## 334       2        1  0.2937995265
## 387       2        1  0.2874585030
## 99        2        1  0.2858784541
## 86        2        1  0.2764852959
## 129       2        1  0.2696439131
## 124       2        1  0.2694985956
## 523       2        1  0.2643382111
## 340       2        1  0.2544588614
## 324       2        1  0.2534463530
## 260       2        1  0.2449623757
## 4         2        1  0.2444096130
## 2         2        1  0.2308609315
## 152       2        1  0.2238438310
## 143       2        1  0.2219817593
## 341       2        1  0.2145759351
## 280       2        1  0.2099558139
## 360       2        1  0.2018452122
## 283       2        1  0.1968699736
## 569       2        1  0.1927491234
## 44        2        1  0.1810042373
## 317       2        1  0.1778812304
## 240       2        1  0.1774251040
## 331       2        1  0.1658867289
## 284       2        1  0.1649062320
## 294       2        1  0.1584501710
## 112       2        1  0.1538365808
## 593       2        1  0.1480211115
## 40        2        1  0.1471546533
## 320       2        1  0.1454627122
## 253       2        1  0.1185896526
## 61        2        1  0.1131561632
## 350       2        1  0.1032242429
## 156       2        1  0.0827905599
## 87        2        1  0.0650241179
## 16        2        1  0.0315248145
## 42        2        1  0.0067823609
## 66        2        1  0.0009586036
## 353       2        1 -0.0002461887
## 489       2        1 -0.0053464846
## 75        2        1 -0.0769056348
## 297       2        1 -0.0770290314
## 51        2        1 -0.0821755494
## 289       2        1 -0.0901935806
## 268       2        1 -0.1032264982
## 273       2        1 -0.1032264982
## 658       2        1 -0.1152765773
## 349       2        1 -0.1245754866
## 180       2        1 -0.1565293712
## 147       2        1 -0.1614096874
## Average silhouette width per cluster:
## [1] 0.8024334 0.4101654
## Average silhouette width of total data set:
## [1] 0.6691886
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"
pca.cluster.labels <- pca.pam2$clustering

pca.sil.pam2 <- silhouette(pca.cluster.labels, dist(pca.features))
fviz_silhouette(pca.sil.pam2, xlab="PAM") #silhouette information
##   cluster size ave.sil.width
## 1       1  451          0.80
## 2       2  232          0.41

# Visualization of cluster analysis results (3D scatterplot)
plot3d(pca.features$bcw.after.pca.x...1., pca.features$bcw.after.pca.x...2., pca.features$bcw.after.pca.x...3., col=pca.cluster.labels, 
       pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="Dim_1", 
       ylab="Dim_2", zlab="Dim_3")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels), 
         col = pca.cluster.labels, cex=1, inset=c(0.02))
#snapshot3d(filename = '3dplot.png', fmt = 'png')
## CLARA
# compute CLARA FOR ALL FEATURES
bcw.clara <- clara(bcw.features, 2, samples=200, pamLike = TRUE)
print(bcw.clara)
## Call:     clara(x = bcw.features, k = 2, samples = 200, pamLike = TRUE) 
## Medoids:
##     clump_thickness uniformity_cell_size uniformity_cell_shape
## 646               3                    1                     1
## 468               6                    6                     6
##     marginal_adhesion single_epithelial_cell_size bare_nuclei bland_chromatin
## 646                 1                           2           1               2
## 468                 5                           4          10               7
##     normal_nucleoli mitoses
## 646               1       1
## 468               6       2
## Objective function:   4.52286
## Clustering vector:    Named int [1:683] 1 2 1 2 1 2 1 1 1 1 1 1 1 1 2 1 1 1 ...
##  - attr(*, "names")= chr [1:683] "1" "2" "3" "4" "5" "6" "7" ...
## Cluster sizes:            454 229 
## Best sample:
##  [1] 16  19  30  45  98  114 121 132 134 136 142 145 200 201 217 241 326 338 345
## [20] 346 352 396 397 405 416 451 468 476 477 500 505 514 540 542 543 576 582 611
## [39] 646 676 682 685 687 697
## 
## Available components:
##  [1] "sample"     "medoids"    "i.med"      "clustering" "objective" 
##  [6] "clusinfo"   "diss"       "call"       "silinfo"    "data"
bcw.clara.clust <- bcw.clara$cluster

bcw.sil.clara <- silhouette(bcw.clara.clust, dist(bcw.features))
fviz_silhouette(bcw.sil.clara, xlab="CLARA")
##   cluster size ave.sil.width
## 1       1  454          0.76
## 2       2  229          0.28

# Visualization of cluster analysis results (3D scatterplot)
plot3d(bcw.features$uniformity_cell_size, bcw.features$uniformity_cell_shape, bcw$clump_thickness, col=bcw.clara.clust, 
       pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="uniformity_cell_size", 
       ylab="uniformity_cell_shape", zlab="clump thickness")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels), 
         col = bcw.clara.clust, cex=1, inset=c(0.02))
#snapshot3d(filename = '3dplot.png', fmt = 'png')


# compute CLARA FOR PCA FEATURES
pca.clara <- clara(pca.features, 2, samples=200, pamLike = TRUE)
print(pca.clara)
## Call:     clara(x = pca.features, k = 2, samples = 200, pamLike = TRUE) 
## Medoids:
##     bcw.after.pca.x...1. bcw.after.pca.x...2. bcw.after.pca.x...3.
## 134             1.714471           0.09986549         -0.006010226
## 659            -3.115058          -0.50006703          0.011453841
## Objective function:   1.118681
## Clustering vector:    Named int [1:683] 1 2 1 2 1 2 1 1 1 1 1 1 1 1 2 2 1 1 ...
##  - attr(*, "names")= chr [1:683] "1" "2" "3" "4" "5" "6" "7" ...
## Cluster sizes:            451 232 
## Best sample:
##  [1] 18  20  23  39  40  112 113 119 134 150 157 161 166 190 191 205 230 235 271
## [20] 302 321 325 327 377 395 397 418 439 474 485 504 507 509 512 516 539 575 586
## [39] 623 639 659 681 694 696
## 
## Available components:
##  [1] "sample"     "medoids"    "i.med"      "clustering" "objective" 
##  [6] "clusinfo"   "diss"       "call"       "silinfo"    "data"
pca.clara.clust <- pca.clara$cluster

pca.sil.clara <- silhouette(pca.clara.clust, dist(pca.features))
fviz_silhouette(pca.sil.clara, xlab="CLARA")
##   cluster size ave.sil.width
## 1       1  451          0.80
## 2       2  232          0.41

# Visualization of cluster analysis results (3D scatterplot)
plot3d(pca.features$bcw.after.pca.x...1., pca.features$bcw.after.pca.x...2., pca.features$bcw.after.pca.x...3., col=pca.clara.clust, 
       pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="Dim_1", 
       ylab="Dim_2", zlab="Dim_3")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels), 
         col = pca.clara.clust, cex=1, inset=c(0.02))
#snapshot3d(filename = '3dplot.png', fmt = 'png')
## AGNES
## FOR ALL FEATURES
# We compare the methods available for AGNES
m <- c( "average", "single", "complete")
names(m) <- c( "average", "single", "complete")
# function to compute coefficient
ac <- function(x) {
  agnes(bcw.features, method = x)$ac
}
map_dbl(m, ac)  
##   average    single  complete 
## 0.8717631 0.7973720 0.9189242
bcw.agnes.avg <- agnes(bcw.features, method = "average")
pltree(bcw.agnes.avg, cex = 0.6, hang = -1, main = "Dendrogram of AGNES with Average Linkage")

(bcw.agnes.avg.k2 <- cutree(bcw.agnes.avg, k=2)) #Cutting off at k=2
##   [1] 1 2 1 2 1 2 2 1 1 1 1 1 1 1 2 1 1 1 2 1 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1
##  [38] 2 2 1 2 1 2 1 2 1 1 2 1 1 2 2 2 1 2 1 2 1 1 1 2 1 1 1 1 2 2 1 1 2 1 2 1 1
##  [75] 1 1 1 1 1 1 1 1 2 2 2 2 1 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1 2 1 2 2 1 2 1 2 2
## [112] 2 1 1 1 2 1 1 1 1 2 2 2 1 2 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 2
## [149] 2 1 1 2 1 1 2 2 1 1 1 1 2 2 1 1 1 1 1 2 2 2 1 2 1 2 1 1 1 2 2 1 2 2 2 1 2
## [186] 2 1 1 1 1 2 1 1 1 2 2 1 1 1 2 2 1 1 1 2 2 1 2 2 2 1 1 2 1 1 2 1 2 2 1 2 2
## [223] 1 2 2 2 1 2 1 2 2 2 2 1 1 1 1 1 1 2 2 1 1 2 2 2 2 2 1 1 1 2 2 2 2 2 1 1 2
## [260] 2 2 1 2 1 2 1 1 1 1 1 1 1 1 2 2 2 2 2 1 1 2 1 1 2 2 1 1 2 2 1 2 1 2 2 1 1
## [297] 2 1 1 1 1 1 1 2 2 1 2 2 1 2 1 1 2 1 2 2 2 1 1 2 2 1 2 1 1 2 2 1 1 1 2 1 1
## [334] 1 1 2 1 1 1 2 1 1 1 2 2 1 2 2 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1
## [371] 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 2 1 2 1 1 1 1 2
## [408] 1 1 1 2 1 2 1 1 1 1 1 1 2 2 2 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 2 2 1
## [445] 1 1 1 1 1 1 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2 1 1 1 2 1 1 1 2 1 2 1 1
## [482] 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2 2 1 1 1 2 1 1 1 2 1 1 1 1 1 1 2 1 1
## [519] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2
## [556] 2 2 1 1 2 1 1 1 1 1 1 2 2 1 1 1 2 1 2 1 2 2 2 1 2 1 1 1 1 1 1 1 1 2 2 2 1
## [593] 1 2 1 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1
## [630] 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1 1 1 1 1 1 1 2 2
## [667] 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 2 2
table(bcw.agnes.avg.k2)
## bcw.agnes.avg.k2
##   1   2 
## 467 216
bcw.sil.agnes <- silhouette(bcw.agnes.avg.k2, dist(bcw.features))
fviz_silhouette(bcw.sil.agnes, xlab="AGNES")
##   cluster size ave.sil.width
## 1       1  467          0.73
## 2       2  216          0.29

fviz_dend(bcw.agnes.avg, cex=0.4, main="Dendrogram of AGNES with Average Linkage") # standard dendrogram

fviz_dend(bcw.agnes.avg, k=2, cex=0.4) # clustered dendrogram

fviz_dend(bcw.agnes.avg, type="circular", cex=0.4, k=2,  main="Dendrogram of AGNES with Average Linkage") # circular dendrogram

# Visualization of cluster analysis results (3D scatterplot)
plot3d(bcw.features$uniformity_cell_size, bcw.features$uniformity_cell_shape, bcw$clump_thickness, col=bcw.agnes.avg.k2, 
       pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="uniformity_cell_size", 
       ylab="uniformity_cell_shape", zlab="clump thickness")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels), 
         col = bcw.agnes.avg.k2, cex=1, inset=c(0.02))


## FOR PCA FEATURES
# We compare the methods available for AGNES
m <- c( "average", "single", "complete")
names(m) <- c( "average", "single", "complete")
# function to compute coefficient
ac <- function(x) {
  agnes(pca.features, method = x)$ac
}
map_dbl(m, ac)  
##   average    single  complete 
## 0.9692076 0.9015951 0.9790112
pca.agnes.avg <- agnes(pca.features, method = "average")
pltree(pca.agnes.avg, cex = 0.6, hang = -1, main = "Dendrogram of AGNES with Average Linkage - PCA")

(pca.agnes.avg.k2 <- cutree(pca.agnes.avg, k=2)) #Cutting off at k=2
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1
##  [75] 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1
## [112] 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1
## [149] 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1
## [186] 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [223] 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1
## [260] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1
## [297] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [334] 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [371] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [408] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [445] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1
## [482] 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [519] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [556] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [593] 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1
## [630] 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1
## [667] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
table(pca.agnes.avg.k2)
## pca.agnes.avg.k2
##   1   2 
## 650  33
pca.sil.agnes <- silhouette(pca.agnes.avg.k2, dist(pca.features))
fviz_silhouette(pca.sil.agnes, xlab="AGNES")
##   cluster size ave.sil.width
## 1       1  650          0.54
## 2       2   33          0.60

fviz_dend(pca.agnes.avg, cex=0.4, main="Dendrogram of AGNES with Average Linkage - PCA") # standard dendrogram

fviz_dend(pca.agnes.avg, k=2, cex=0.4) # clustered dendrogram

fviz_dend(pca.agnes.avg, type="circular", cex=0.4, k=2,  main="Dendrogram of AGNES with Average Linkage - PCA") # circular dendrogram

# Visualization of cluster analysis results (3D scatterplot)
plot3d(pca.features$bcw.after.pca.x...1., pca.features$bcw.after.pca.x...2., pca.features$bcw.after.pca.x...3., col=pca.agnes.avg.k2, 
       pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="Dim_1", 
       ylab="Dim_2", zlab="Dim_3")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels), 
         col = pca.agnes.avg.k2, cex=1, inset=c(0.02))
## DIANA
# compute divisive hierarchical clustering FOR ALL FEATURES
bcw.diana <- diana(bcw.features)
bcw.diana$dc
## [1] 0.9092248
pltree(bcw.diana, cex = 0.6, hang = -1, main = "Dendrogram of diana")
rect.hclust(bcw.diana, k = 2, border = 2:10)

bcw.diana.clust <- cutree(bcw.diana, k = 2)

bcw.sil.diana <- silhouette(bcw.diana.clust, dist(bcw.features))
fviz_silhouette(bcw.sil.diana, xlab="DIANA")
##   cluster size ave.sil.width
## 1       1  472          0.72
## 2       2  211          0.31

fviz_cluster(list(data = bcw.features, cluster = bcw.diana.clust))

fviz_dend(bcw.diana, k=2, cex=0.4)

fviz_dend(bcw.diana, type="circular", cex=0.4, k=2)

# Visualization of cluster analysis results (3D scatterplot)
plot3d(bcw.features$uniformity_cell_size, bcw.features$uniformity_cell_shape, bcw$clump_thickness, col=bcw.diana.clust, 
       pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="uniformity_cell_size", 
       ylab="uniformity_cell_shape", zlab="clump thickness")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels), 
         col = bcw.diana.clust, cex=1, inset=c(0.02))


# compute divisive hierarchical clustering FOR PCA FEATURES
pca.diana <- diana(pca.features)
pca.diana$dc
## [1] 0.9757748
pltree(pca.diana, cex = 0.6, hang = -1, main = "Dendrogram of diana")
rect.hclust(pca.diana, k = 2, border = 2:10)

pca.diana.clust <- cutree(pca.diana, k = 2)

pca.sil.diana <- silhouette(pca.diana.clust, dist(pca.features))
fviz_silhouette(pca.sil.diana, xlab="DIANA")
##   cluster size ave.sil.width
## 1       1  466          0.77
## 2       2  217          0.44

#fviz_cluster(list(data = pca.features, cluster = pca.diana.clust))
fviz_dend(pca.diana, k=2, cex=0.4)

fviz_dend(pca.agnes.avg, type="circular", cex=0.4, k=2,  main="Dendrogram of AGNES with Average Linkage - PCA")

# Visualization of cluster analysis results (3D scatterplot)
plot3d(pca.features$bcw.after.pca.x...1., pca.features$bcw.after.pca.x...2., pca.features$bcw.after.pca.x...3., col=pca.diana.clust, 
       pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="Dim_1", 
       ylab="Dim_2", zlab="Dim_3")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels), 
         col = pca.diana.clust, cex=1, inset=c(0.02))
## External Cluster Validation
library(e1071)
library("fpc")

clust.results <- list(bcw.kmeans.labels,bcw.cluster.labels,bcw.agnes.avg.k2,bcw.diana.clust,bcw.clara.clust,
            pca.kmeans.labels,pca.cluster.labels,pca.agnes.avg.k2,pca.diana.clust,pca.clara.clust)
partition.agreement <- numeric(10)
j=1
for (cruster in clust.results) {
  matchClasses(table(cruster, bcw.real.class.labels), method="exact")
  part.agreement <- compareMatchedClasses(cruster, bcw.real.class.labels, method="exact")$diag
  print(part.agreement)
  partition.agreement[j] <- round(part.agreement, 3)
  j = j+1
}
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 96.05 %
##           [,1]
## [1,] 0.9604685
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 95.9 %
##           [,1]
## [1,] 0.9590044
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 94.29 %
##          [,1]
## [1,] 0.942899
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 94.14 %
##           [,1]
## [1,] 0.9414348
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 95.9 %
##           [,1]
## [1,] 0.9590044
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 96.05 %
##           [,1]
## [1,] 0.9604685
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 96.05 %
##           [,1]
## [1,] 0.9604685
## Direct agreement: 1 of 2 pairs
## Iterations for permutation matching: 1 
## Cases in matched pairs: 69.84 %
##           [,1]
## [1,] 0.6983895
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 94.73 %
##           [,1]
## [1,] 0.9472914
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 96.05 %
##           [,1]
## [1,] 0.9604685
pat.agree.val <- data.frame(Methods=c("KMeans", "PAM", "AGNES", "DIANA", "CLARA", "KMeans_PCA", "PAM_PCA", 
                                      "AGNES_PCA", "DIANA_PCA", "CLARA_PCA"),Partition_Agreement=partition.agreement)
ggplot(pat.agree.val, aes(x=Methods, y=Partition_Agreement, fill=Methods)) + geom_bar(stat="identity") + 
  geom_text(aes(label=Partition_Agreement), vjust=-0.3, size=3.5)

##PARTITION AGREEMENT: REAL LABELS AND MODEL LABELS COMPARED
clust.resultsss <- data.frame(bcw.kmeans.labels,bcw.cluster.labels,bcw.agnes.avg.k2,bcw.diana.clust,bcw.clara.clust,
                      pca.kmeans.labels,pca.cluster.labels,pca.agnes.avg.k2,pca.diana.clust,pca.clara.clust)
pat.res.matrix <- matrix(0, nrow = length(clust.results), ncol = length(clust.results))
colnames(pat.res.matrix) <- c("KMeans", "PAM", "AGNES", "DIANA", "CLARA", "KMeans_PCA", "PAM_PCA", 
                                      "AGNES_PCA", "DIANA_PCA", "CLARA_PCA")
rownames(pat.res.matrix) <- c("KMeans", "PAM", "AGNES", "DIANA", "CLARA", "KMeans_PCA", "PAM_PCA", 
                                      "AGNES_PCA", "DIANA_PCA", "CLARA_PCA")
pat.resprand.matrix <- matrix(0, nrow = length(clust.results), ncol = 1) #FOR THE RAND INDEX
acc.res.vector <- matrix(0, nrow = length(clust.results), ncol = 1) #ACCURACY
species <- as.numeric(bcw.real.class.labels)

for (i in 1:length(clust.results)){
  for (j in 1:length(clust.results)){
    if (i==j){
      part.agreement <- compareMatchedClasses(clust.resultsss[,i], bcw.real.class.labels, method="exact")$diag
      pat.res.matrix[i,j] <- round(part.agreement, 3)
      accuracy <- mean(species == clust.resultsss[,i])
      if (i==1){
        acc.res.vector[i] <- round(1-accuracy, 3)
      } else{
        acc.res.vector[i] <- round(accuracy, 3) 
      }
      if (i<6){
        clust_stats <- cluster.stats(d = dist(pca.features), species, clust.resultsss[,i])
        pat.resprand.matrix[i] <- round(clust_stats$corrected.rand, 3)
      } else {
        clust_stats <- cluster.stats(d = dist(bcw.features), species, clust.resultsss[,i])
        pat.resprand.matrix[i] <- round(clust_stats$corrected.rand, 3)
      }
    } else {
      part.agreement <- compareMatchedClasses(clust.resultsss[,i], clust.resultsss[,j], method="exact")$diag
      pat.res.matrix[i,j] <- round(part.agreement, 3)
    }
  }
}
library(pheatmap)
pheatmap(pat.res.matrix, display_numbers = T, cluster_rows = F, cluster_cols = F, number_format = "%.3f")

pat.res.matrix #RAND INDEX
##            KMeans   PAM AGNES DIANA CLARA KMeans_PCA PAM_PCA AGNES_PCA
## KMeans      0.960 0.996 0.977 0.972 0.996      0.982   0.988     0.712
## PAM         0.996 0.959 0.978 0.974 1.000      0.978   0.984     0.713
## AGNES       0.977 0.978 0.943 0.972 0.978      0.959   0.965     0.732
## DIANA       0.972 0.974 0.972 0.941 0.974      0.969   0.969     0.739
## CLARA       0.996 1.000 0.978 0.974 0.959      0.978   0.984     0.713
## KMeans_PCA  0.982 0.978 0.959 0.969 0.978      0.960   0.994     0.709
## PAM_PCA     0.988 0.984 0.965 0.969 0.984      0.994   0.960     0.709
## AGNES_PCA   0.712 0.713 0.732 0.739 0.713      0.709   0.709     0.698
## DIANA_PCA   0.981 0.980 0.975 0.991 0.980      0.978   0.978     0.731
## CLARA_PCA   0.988 0.984 0.965 0.969 0.984      0.994   1.000     0.709
##            DIANA_PCA CLARA_PCA
## KMeans         0.981     0.988
## PAM            0.980     0.984
## AGNES          0.975     0.965
## DIANA          0.991     0.969
## CLARA          0.980     0.984
## KMeans_PCA     0.978     0.994
## PAM_PCA        0.978     1.000
## AGNES_PCA      0.731     0.709
## DIANA_PCA      0.947     0.978
## CLARA_PCA      0.978     0.960
acc.res.vector #ACCURACY MEASURE
##        [,1]
##  [1,] 0.040
##  [2,] 0.959
##  [3,] 0.943
##  [4,] 0.941
##  [5,] 0.959
##  [6,] 0.040
##  [7,] 0.960
##  [8,] 0.698
##  [9,] 0.947
## [10,] 0.960